Brian Stinson 2593d8
From a735699a8287c19e043b7d2fe9a387a3938e1e2f Mon Sep 17 00:00:00 2001
Brian Stinson 2593d8
From: =?UTF-8?q?Michal=20Sekleta=CC=81r?= <msekleta@redhat.com>
Brian Stinson 2593d8
Date: Mon, 18 Nov 2019 12:50:11 +0100
Brian Stinson 2593d8
Subject: [PATCH] core: introduce NUMAPolicy and NUMAMask options
Brian Stinson 2593d8
Brian Stinson 2593d8
Make possible to set NUMA allocation policy for manager. Manager's
Brian Stinson 2593d8
policy is by default inherited to all forked off processes. However, it
Brian Stinson 2593d8
is possible to override the policy on per-service basis. Currently we
Brian Stinson 2593d8
support, these policies: default, prefer, bind, interleave, local.
Brian Stinson 2593d8
See man 2 set_mempolicy for details on each policy.
Brian Stinson 2593d8
Brian Stinson 2593d8
Overall NUMA policy actually consists of two parts. Policy itself and
Brian Stinson 2593d8
bitmask representing NUMA nodes where is policy effective. Node mask can
Brian Stinson 2593d8
be specified using related option, NUMAMask. Default mask can be
Brian Stinson 2593d8
overwritten on per-service level.
Brian Stinson 2593d8
Brian Stinson 2593d8
(cherry-picked from commit fe9c54b2188e6cd23262a319f96b13215f2c5e9c)
Brian Stinson 2593d8
Brian Stinson 2593d8
Resolves: #1734787
Brian Stinson 2593d8
---
Brian Stinson 2593d8
 man/systemd-system.conf.xml           | 19 ++++++
Brian Stinson 2593d8
 man/systemd.exec.xml                  | 28 +++++++++
Brian Stinson 2593d8
 meson.build                           |  4 ++
Brian Stinson 2593d8
 src/basic/cpu-set-util.c              | 91 +++++++++++++++++++++++++++
Brian Stinson 2593d8
 src/basic/cpu-set-util.h              | 28 +++++++++
Brian Stinson 2593d8
 src/basic/exit-status.c               |  3 +
Brian Stinson 2593d8
 src/basic/exit-status.h               |  1 +
Brian Stinson 2593d8
 src/basic/missing_syscall.h           | 43 +++++++++++++
Brian Stinson 2593d8
 src/core/dbus-execute.c               | 65 ++++++++++++++++++-
Brian Stinson 2593d8
 src/core/execute.c                    | 20 ++++++
Brian Stinson 2593d8
 src/core/execute.h                    |  1 +
Brian Stinson 2593d8
 src/core/load-fragment-gperf.gperf.m4 |  2 +
Brian Stinson 2593d8
 src/core/load-fragment.c              | 28 +++++++++
Brian Stinson 2593d8
 src/core/load-fragment.h              |  2 +
Brian Stinson 2593d8
 src/core/main.c                       | 27 ++++++++
Brian Stinson 2593d8
 src/core/system.conf.in               |  2 +
Brian Stinson 2593d8
 src/shared/bus-unit-util.c            | 28 +++++++++
Brian Stinson 2593d8
 src/systemctl/systemctl.c             | 18 +++++-
Brian Stinson 2593d8
 18 files changed, 405 insertions(+), 5 deletions(-)
Brian Stinson 2593d8
Brian Stinson 2593d8
diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml
Brian Stinson 2593d8
index ab23779ec0..988c4e7665 100644
Brian Stinson 2593d8
--- a/man/systemd-system.conf.xml
Brian Stinson 2593d8
+++ b/man/systemd-system.conf.xml
Brian Stinson 2593d8
@@ -132,6 +132,25 @@
Brian Stinson 2593d8
         anymore.</para></listitem>
Brian Stinson 2593d8
       </varlistentry>
Brian Stinson 2593d8
 
Brian Stinson 2593d8
+      <varlistentry>
Brian Stinson 2593d8
+        <term><varname>NUMAPolicy=</varname></term>
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        <listitem><para>Configures the NUMA memory policy for the service manager and the default NUMA memory policy
Brian Stinson 2593d8
+        for all forked off processes. Individual services may override the default policy with the
Brian Stinson 2593d8
+        <varname>NUMAPolicy=</varname> setting in unit files, see
Brian Stinson 2593d8
+        <citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para></listitem>
Brian Stinson 2593d8
+      </varlistentry>
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+      <varlistentry>
Brian Stinson 2593d8
+        <term><varname>NUMAMask=</varname></term>
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        <listitem><para>Configures the NUMA node mask that will be associated with the selected NUMA policy. Note that
Brian Stinson 2593d8
+        <option>default</option> and <option>local</option> NUMA policies don't require explicit NUMA node mask and
Brian Stinson 2593d8
+        value of the option can be empty. Similarly to <varname>NUMAPolicy=</varname>, value can be overriden
Brian Stinson 2593d8
+        by individual services in unit files, see
Brian Stinson 2593d8
+        <citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para></listitem>
Brian Stinson 2593d8
+      </varlistentry>
Brian Stinson 2593d8
+
Brian Stinson 2593d8
       <varlistentry>
Brian Stinson 2593d8
         <term><varname>RuntimeWatchdogSec=</varname></term>
Brian Stinson 2593d8
         <term><varname>ShutdownWatchdogSec=</varname></term>
Brian Stinson 2593d8
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
Brian Stinson 2593d8
index 342b8385bc..87fb8b34f4 100644
Brian Stinson 2593d8
--- a/man/systemd.exec.xml
Brian Stinson 2593d8
+++ b/man/systemd.exec.xml
Brian Stinson 2593d8
@@ -710,6 +710,28 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
Brian Stinson 2593d8
         details.</para></listitem>
Brian Stinson 2593d8
       </varlistentry>
Brian Stinson 2593d8
 
Brian Stinson 2593d8
+      <varlistentry>
Brian Stinson 2593d8
+        <term><varname>NUMAPolicy=</varname></term>
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        <listitem><para>Controls the NUMA memory policy of the executed processes. Takes a policy type, one of:
Brian Stinson 2593d8
+        <option>default</option>, <option>preferred</option>, <option>bind</option>, <option>interleave</option> and
Brian Stinson 2593d8
+        <option>local</option>. A list of NUMA nodes that should be associated with the policy must be specified
Brian Stinson 2593d8
+        in <varname>NUMAMask=</varname>. For more details on each policy please see,
Brian Stinson 2593d8
+        <citerefentry><refentrytitle>set_mempolicy</refentrytitle><manvolnum>2</manvolnum></citerefentry>. For overall
Brian Stinson 2593d8
+        overview of NUMA support in Linux see,
Brian Stinson 2593d8
+        <citerefentry><refentrytitle>numa</refentrytitle><manvolnum>7</manvolnum></citerefentry>
Brian Stinson 2593d8
+        </para></listitem>
Brian Stinson 2593d8
+      </varlistentry>
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+      <varlistentry>
Brian Stinson 2593d8
+        <term><varname>NUMAMask=</varname></term>
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        <listitem><para>Controls the NUMA node list which will be applied alongside with selected NUMA policy.
Brian Stinson 2593d8
+        Takes a list of NUMA nodes and has the same syntax as a list of CPUs for <varname>CPUAffinity=</varname>
Brian Stinson 2593d8
+        option. Note that the list of NUMA nodes is not required for <option>default</option> and <option>local</option>
Brian Stinson 2593d8
+        policies and for <option>preferred</option> policy we expect a single NUMA node.</para></listitem>
Brian Stinson 2593d8
+      </varlistentry>
Brian Stinson 2593d8
+
Brian Stinson 2593d8
       <varlistentry>
Brian Stinson 2593d8
         <term><varname>IOSchedulingClass=</varname></term>
Brian Stinson 2593d8
 
Brian Stinson 2593d8
@@ -2709,6 +2731,12 @@ StandardInputData=SWNrIHNpdHplIGRhIHVuJyBlc3NlIEtsb3BzLAp1ZmYgZWVtYWwga2xvcHAncy
Brian Stinson 2593d8
             <entry><constant>EXIT_CONFIGURATION_DIRECTORY</constant></entry>
Brian Stinson 2593d8
             <entry>Failed to set up unit's configuration directory. See <varname>ConfigurationDirectory=</varname> above.</entry>
Brian Stinson 2593d8
           </row>
Brian Stinson 2593d8
+          <row>
Brian Stinson 2593d8
+            <entry>242</entry>
Brian Stinson 2593d8
+            <entry><constant>EXIT_NUMA_POLICY</constant></entry>
Brian Stinson 2593d8
+            <entry>Failed to set up unit's NUMA memory policy. See <varname>NUMAPolicy=</varname> and <varname>NUMAMask=</varname>above.</entry>
Brian Stinson 2593d8
+          </row>
Brian Stinson 2593d8
+
Brian Stinson 2593d8
         
Brian Stinson 2593d8
       </tgroup>
Brian Stinson 2593d8
     
Brian Stinson 2593d8
diff --git a/meson.build b/meson.build
Brian Stinson 2593d8
index 613a5133b6..fe82ca4ac2 100644
Brian Stinson 2593d8
--- a/meson.build
Brian Stinson 2593d8
+++ b/meson.build
Brian Stinson 2593d8
@@ -501,6 +501,10 @@ foreach ident : [
Brian Stinson 2593d8
                                  #include <unistd.h>'''],
Brian Stinson 2593d8
         ['explicit_bzero' ,   '''#include <string.h>'''],
Brian Stinson 2593d8
         ['reallocarray',      '''#include <malloc.h>'''],
Brian Stinson 2593d8
+        ['set_mempolicy',     '''#include <stdlib.h>
Brian Stinson 2593d8
+                                 #include <unistd.h>'''],
Brian Stinson 2593d8
+        ['get_mempolicy',     '''#include <stdlib.h>
Brian Stinson 2593d8
+                                 #include <unistd.h>'''],
Brian Stinson 2593d8
 ]
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         have = cc.has_function(ident[0], prefix : ident[1], args : '-D_GNU_SOURCE')
Brian Stinson 2593d8
diff --git a/src/basic/cpu-set-util.c b/src/basic/cpu-set-util.c
Brian Stinson 2593d8
index 103b9703b3..36cb017ae7 100644
Brian Stinson 2593d8
--- a/src/basic/cpu-set-util.c
Brian Stinson 2593d8
+++ b/src/basic/cpu-set-util.c
Brian Stinson 2593d8
@@ -10,11 +10,17 @@
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 #include "alloc-util.h"
Brian Stinson 2593d8
 #include "cpu-set-util.h"
Brian Stinson 2593d8
+#include "dirent-util.h"
Brian Stinson 2593d8
 #include "extract-word.h"
Brian Stinson 2593d8
+#include "fd-util.h"
Brian Stinson 2593d8
 #include "log.h"
Brian Stinson 2593d8
 #include "macro.h"
Brian Stinson 2593d8
+#include "missing.h"
Brian Stinson 2593d8
 #include "parse-util.h"
Brian Stinson 2593d8
+#include "stat-util.h"
Brian Stinson 2593d8
 #include "string-util.h"
Brian Stinson 2593d8
+#include "string-table.h"
Brian Stinson 2593d8
+#include "strv.h"
Brian Stinson 2593d8
 #include "util.h"
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 char* cpu_set_to_string(const CPUSet *a) {
Brian Stinson 2593d8
@@ -290,3 +296,88 @@ int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set) {
Brian Stinson 2593d8
         s = (CPUSet) {};
Brian Stinson 2593d8
         return 0;
Brian Stinson 2593d8
 }
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+bool numa_policy_is_valid(const NUMAPolicy *policy) {
Brian Stinson 2593d8
+        assert(policy);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        if (!mpol_is_valid(numa_policy_get_type(policy)))
Brian Stinson 2593d8
+                return false;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        if (!policy->nodes.set &&
Brian Stinson 2593d8
+            !IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL, MPOL_PREFERRED))
Brian Stinson 2593d8
+                return false;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        if (policy->nodes.set &&
Brian Stinson 2593d8
+            numa_policy_get_type(policy) == MPOL_PREFERRED &&
Brian Stinson 2593d8
+            CPU_COUNT_S(policy->nodes.allocated, policy->nodes.set) != 1)
Brian Stinson 2593d8
+                return false;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        return true;
Brian Stinson 2593d8
+}
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+static int numa_policy_to_mempolicy(const NUMAPolicy *policy, unsigned long *ret_maxnode, unsigned long **ret_nodes) {
Brian Stinson 2593d8
+        unsigned node, bits = 0, ulong_bits;
Brian Stinson 2593d8
+        _cleanup_free_ unsigned long *out = NULL;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        assert(policy);
Brian Stinson 2593d8
+        assert(ret_maxnode);
Brian Stinson 2593d8
+        assert(ret_nodes);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        if (IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL) ||
Brian Stinson 2593d8
+            (numa_policy_get_type(policy) == MPOL_PREFERRED && !policy->nodes.set)) {
Brian Stinson 2593d8
+                *ret_nodes = NULL;
Brian Stinson 2593d8
+                *ret_maxnode = 0;
Brian Stinson 2593d8
+                return 0;
Brian Stinson 2593d8
+        }
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        bits = policy->nodes.allocated * 8;
Brian Stinson 2593d8
+        ulong_bits = sizeof(unsigned long) * 8;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        out = new0(unsigned long, DIV_ROUND_UP(policy->nodes.allocated, sizeof(unsigned long)));
Brian Stinson 2593d8
+        if (!out)
Brian Stinson 2593d8
+                return -ENOMEM;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        /* We don't make any assumptions about internal type libc is using to store NUMA node mask.
Brian Stinson 2593d8
+           Hence we need to convert the node mask to the representation expected by set_mempolicy() */
Brian Stinson 2593d8
+        for (node = 0; node < bits; node++)
Brian Stinson 2593d8
+                if (CPU_ISSET_S(node, policy->nodes.allocated, policy->nodes.set))
Brian Stinson 2593d8
+                        out[node / ulong_bits] |= 1ul << (node % ulong_bits);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        *ret_nodes = TAKE_PTR(out);
Brian Stinson 2593d8
+        *ret_maxnode = bits + 1;
Brian Stinson 2593d8
+        return 0;
Brian Stinson 2593d8
+}
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+int apply_numa_policy(const NUMAPolicy *policy) {
Brian Stinson 2593d8
+        int r;
Brian Stinson 2593d8
+        _cleanup_free_ unsigned long *nodes = NULL;
Brian Stinson 2593d8
+        unsigned long maxnode;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        assert(policy);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS)
Brian Stinson 2593d8
+                return -EOPNOTSUPP;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        if (!numa_policy_is_valid(policy))
Brian Stinson 2593d8
+                return -EINVAL;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        r = numa_policy_to_mempolicy(policy, &maxnode, &nodes);
Brian Stinson 2593d8
+        if (r < 0)
Brian Stinson 2593d8
+                return r;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        r = set_mempolicy(numa_policy_get_type(policy), nodes, maxnode);
Brian Stinson 2593d8
+        if (r < 0)
Brian Stinson 2593d8
+                return -errno;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        return 0;
Brian Stinson 2593d8
+}
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+static const char* const mpol_table[] = {
Brian Stinson 2593d8
+        [MPOL_DEFAULT]    = "default",
Brian Stinson 2593d8
+        [MPOL_PREFERRED]  = "preferred",
Brian Stinson 2593d8
+        [MPOL_BIND]       = "bind",
Brian Stinson 2593d8
+        [MPOL_INTERLEAVE] = "interleave",
Brian Stinson 2593d8
+        [MPOL_LOCAL]      = "local",
Brian Stinson 2593d8
+};
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+DEFINE_STRING_TABLE_LOOKUP(mpol, int);
Brian Stinson 2593d8
diff --git a/src/basic/cpu-set-util.h b/src/basic/cpu-set-util.h
Brian Stinson 2593d8
index ec640b2ec9..295028cb54 100644
Brian Stinson 2593d8
--- a/src/basic/cpu-set-util.h
Brian Stinson 2593d8
+++ b/src/basic/cpu-set-util.h
Brian Stinson 2593d8
@@ -8,6 +8,7 @@
Brian Stinson 2593d8
 #include <sched.h>
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 #include "macro.h"
Brian Stinson 2593d8
+#include "missing.h"
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 /* This wraps the libc interface with a variable to keep the allocated size. */
Brian Stinson 2593d8
 typedef struct CPUSet {
Brian Stinson 2593d8
@@ -52,3 +53,30 @@ int cpu_set_to_dbus(const CPUSet *set, uint8_t **ret, size_t *allocated);
Brian Stinson 2593d8
 int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 int cpus_in_affinity_mask(void);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+static inline bool mpol_is_valid(int t) {
Brian Stinson 2593d8
+        return t >= MPOL_DEFAULT && t <= MPOL_LOCAL;
Brian Stinson 2593d8
+}
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+typedef struct NUMAPolicy {
Brian Stinson 2593d8
+        /* Always use numa_policy_get_type() to read the value */
Brian Stinson 2593d8
+        int type;
Brian Stinson 2593d8
+        CPUSet nodes;
Brian Stinson 2593d8
+} NUMAPolicy;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+bool numa_policy_is_valid(const NUMAPolicy *p);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+static inline int numa_policy_get_type(const NUMAPolicy *p) {
Brian Stinson 2593d8
+        return p->type < 0 ? (p->nodes.set ? MPOL_PREFERRED : -1) : p->type;
Brian Stinson 2593d8
+}
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+static inline void numa_policy_reset(NUMAPolicy *p) {
Brian Stinson 2593d8
+        assert(p);
Brian Stinson 2593d8
+        cpu_set_reset(&p->nodes);
Brian Stinson 2593d8
+        p->type = -1;
Brian Stinson 2593d8
+}
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+int apply_numa_policy(const NUMAPolicy *policy);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+const char* mpol_to_string(int i) _const_;
Brian Stinson 2593d8
+int mpol_from_string(const char *s) _pure_;
Brian Stinson 2593d8
diff --git a/src/basic/exit-status.c b/src/basic/exit-status.c
Brian Stinson 2593d8
index 21af8c4c71..0a7a53b73d 100644
Brian Stinson 2593d8
--- a/src/basic/exit-status.c
Brian Stinson 2593d8
+++ b/src/basic/exit-status.c
Brian Stinson 2593d8
@@ -155,6 +155,9 @@ const char* exit_status_to_string(int status, ExitStatusLevel level) {
Brian Stinson 2593d8
 
Brian Stinson 2593d8
                 case EXIT_CONFIGURATION_DIRECTORY:
Brian Stinson 2593d8
                         return "CONFIGURATION_DIRECTORY";
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                case EXIT_NUMA_POLICY:
Brian Stinson 2593d8
+                        return "NUMA_POLICY";
Brian Stinson 2593d8
                 }
Brian Stinson 2593d8
         }
Brian Stinson 2593d8
 
Brian Stinson 2593d8
diff --git a/src/basic/exit-status.h b/src/basic/exit-status.h
Brian Stinson 2593d8
index c41e8b82c3..dc284aacb1 100644
Brian Stinson 2593d8
--- a/src/basic/exit-status.h
Brian Stinson 2593d8
+++ b/src/basic/exit-status.h
Brian Stinson 2593d8
@@ -69,6 +69,7 @@ enum {
Brian Stinson 2593d8
         EXIT_CACHE_DIRECTORY,
Brian Stinson 2593d8
         EXIT_LOGS_DIRECTORY, /* 240 */
Brian Stinson 2593d8
         EXIT_CONFIGURATION_DIRECTORY,
Brian Stinson 2593d8
+        EXIT_NUMA_POLICY,
Brian Stinson 2593d8
 };
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 typedef enum ExitStatusLevel {
Brian Stinson 2593d8
diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h
Brian Stinson 2593d8
index 93c60458bf..014dd2b326 100644
Brian Stinson 2593d8
--- a/src/basic/missing_syscall.h
Brian Stinson 2593d8
+++ b/src/basic/missing_syscall.h
Brian Stinson 2593d8
@@ -428,3 +428,46 @@ static inline ssize_t missing_statx(int dfd, const char *filename, unsigned flag
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 #  define statx missing_statx
Brian Stinson 2593d8
 #endif
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+#if !HAVE_SET_MEMPOLICY
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+enum {
Brian Stinson 2593d8
+        MPOL_DEFAULT,
Brian Stinson 2593d8
+        MPOL_PREFERRED,
Brian Stinson 2593d8
+        MPOL_BIND,
Brian Stinson 2593d8
+        MPOL_INTERLEAVE,
Brian Stinson 2593d8
+        MPOL_LOCAL,
Brian Stinson 2593d8
+};
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+static inline long missing_set_mempolicy(int mode, const unsigned long *nodemask,
Brian Stinson 2593d8
+                           unsigned long maxnode) {
Brian Stinson 2593d8
+        long i;
Brian Stinson 2593d8
+#  ifdef __NR_set_mempolicy
Brian Stinson 2593d8
+        i = syscall(__NR_set_mempolicy, mode, nodemask, maxnode);
Brian Stinson 2593d8
+#  else
Brian Stinson 2593d8
+        errno = ENOSYS;
Brian Stinson 2593d8
+        i = -1;
Brian Stinson 2593d8
+#  endif
Brian Stinson 2593d8
+        return i;
Brian Stinson 2593d8
+}
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+#  define set_mempolicy missing_set_mempolicy
Brian Stinson 2593d8
+#endif
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+#if !HAVE_GET_MEMPOLICY
Brian Stinson 2593d8
+static inline long missing_get_mempolicy(int *mode, unsigned long *nodemask,
Brian Stinson 2593d8
+                           unsigned long maxnode, void *addr,
Brian Stinson 2593d8
+                           unsigned long flags) {
Brian Stinson 2593d8
+        long i;
Brian Stinson 2593d8
+#  ifdef __NR_get_mempolicy
Brian Stinson 2593d8
+        i = syscall(__NR_get_mempolicy, mode, nodemask, maxnode, addr, flags);
Brian Stinson 2593d8
+#  else
Brian Stinson 2593d8
+        errno = ENOSYS;
Brian Stinson 2593d8
+        i = -1;
Brian Stinson 2593d8
+#  endif
Brian Stinson 2593d8
+        return i;
Brian Stinson 2593d8
+}
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+#define get_mempolicy missing_get_mempolicy
Brian Stinson 2593d8
+#endif
Brian Stinson 2593d8
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
Brian Stinson 2593d8
index 50ea71a281..198f149210 100644
Brian Stinson 2593d8
--- a/src/core/dbus-execute.c
Brian Stinson 2593d8
+++ b/src/core/dbus-execute.c
Brian Stinson 2593d8
@@ -223,6 +223,48 @@ static int property_get_cpu_affinity(
Brian Stinson 2593d8
         return sd_bus_message_append_array(reply, 'y', c->cpu_set.set, c->cpu_set.allocated);
Brian Stinson 2593d8
 }
Brian Stinson 2593d8
 
Brian Stinson 2593d8
+static int property_get_numa_mask(
Brian Stinson 2593d8
+                sd_bus *bus,
Brian Stinson 2593d8
+                const char *path,
Brian Stinson 2593d8
+                const char *interface,
Brian Stinson 2593d8
+                const char *property,
Brian Stinson 2593d8
+                sd_bus_message *reply,
Brian Stinson 2593d8
+                void *userdata,
Brian Stinson 2593d8
+                sd_bus_error *error) {
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        ExecContext *c = userdata;
Brian Stinson 2593d8
+        _cleanup_free_ uint8_t *array = NULL;
Brian Stinson 2593d8
+        size_t allocated;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        assert(bus);
Brian Stinson 2593d8
+        assert(reply);
Brian Stinson 2593d8
+        assert(c);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        (void) cpu_set_to_dbus(&c->numa_policy.nodes, &array, &allocated);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        return sd_bus_message_append_array(reply, 'y', array, allocated);
Brian Stinson 2593d8
+}
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+static int property_get_numa_policy(
Brian Stinson 2593d8
+                sd_bus *bus,
Brian Stinson 2593d8
+                const char *path,
Brian Stinson 2593d8
+                const char *interface,
Brian Stinson 2593d8
+                const char *property,
Brian Stinson 2593d8
+                sd_bus_message *reply,
Brian Stinson 2593d8
+                void *userdata,
Brian Stinson 2593d8
+                sd_bus_error *error) {
Brian Stinson 2593d8
+        ExecContext *c = userdata;
Brian Stinson 2593d8
+        int32_t policy;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        assert(bus);
Brian Stinson 2593d8
+        assert(reply);
Brian Stinson 2593d8
+        assert(c);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        policy = numa_policy_get_type(&c->numa_policy);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        return sd_bus_message_append_basic(reply, 'i', &policy);
Brian Stinson 2593d8
+}
Brian Stinson 2593d8
+
Brian Stinson 2593d8
 static int property_get_timer_slack_nsec(
Brian Stinson 2593d8
                 sd_bus *bus,
Brian Stinson 2593d8
                 const char *path,
Brian Stinson 2593d8
@@ -698,6 +740,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
Brian Stinson 2593d8
         SD_BUS_PROPERTY("CPUSchedulingPolicy", "i", property_get_cpu_sched_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
Brian Stinson 2593d8
         SD_BUS_PROPERTY("CPUSchedulingPriority", "i", property_get_cpu_sched_priority, 0, SD_BUS_VTABLE_PROPERTY_CONST),
Brian Stinson 2593d8
         SD_BUS_PROPERTY("CPUAffinity", "ay", property_get_cpu_affinity, 0, SD_BUS_VTABLE_PROPERTY_CONST),
Brian Stinson 2593d8
+        SD_BUS_PROPERTY("NUMAPolicy", "i", property_get_numa_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
Brian Stinson 2593d8
+        SD_BUS_PROPERTY("NUMAMask", "ay", property_get_numa_mask, 0, SD_BUS_VTABLE_PROPERTY_CONST),
Brian Stinson 2593d8
         SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
Brian Stinson 2593d8
         SD_BUS_PROPERTY("CPUSchedulingResetOnFork", "b", bus_property_get_bool, offsetof(ExecContext, cpu_sched_reset_on_fork), SD_BUS_VTABLE_PROPERTY_CONST),
Brian Stinson 2593d8
         SD_BUS_PROPERTY("NonBlocking", "b", bus_property_get_bool, offsetof(ExecContext, non_blocking), SD_BUS_VTABLE_PROPERTY_CONST),
Brian Stinson 2593d8
@@ -1550,9 +1594,10 @@ int bus_exec_context_set_transient_property(
Brian Stinson 2593d8
                 return 1;
Brian Stinson 2593d8
         }
Brian Stinson 2593d8
 #endif
Brian Stinson 2593d8
-        if (streq(name, "CPUAffinity")) {
Brian Stinson 2593d8
+        if (STR_IN_SET(name, "CPUAffinity", "NUMAMask")) {
Brian Stinson 2593d8
                 const void *a;
Brian Stinson 2593d8
                 size_t n;
Brian Stinson 2593d8
+                bool affinity = streq(name, "CPUAffinity");
Brian Stinson 2593d8
                 _cleanup_(cpu_set_reset) CPUSet set = {};
Brian Stinson 2593d8
 
Brian Stinson 2593d8
                 r = sd_bus_message_read_array(message, 'y', &a, &n);
Brian Stinson 2593d8
@@ -1565,7 +1610,7 @@ int bus_exec_context_set_transient_property(
Brian Stinson 2593d8
 
Brian Stinson 2593d8
                 if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
Brian Stinson 2593d8
                         if (n == 0) {
Brian Stinson 2593d8
-                                cpu_set_reset(&c->cpu_set);
Brian Stinson 2593d8
+                                cpu_set_reset(affinity ? &c->cpu_set : &c->numa_policy.nodes);
Brian Stinson 2593d8
                                 unit_write_settingf(u, flags, name, "%s=", name);
Brian Stinson 2593d8
                         } else {
Brian Stinson 2593d8
                                 _cleanup_free_ char *str = NULL;
Brian Stinson 2593d8
@@ -1577,7 +1622,7 @@ int bus_exec_context_set_transient_property(
Brian Stinson 2593d8
                                 /* We forego any optimizations here, and always create the structure using
Brian Stinson 2593d8
                                  * cpu_set_add_all(), because we don't want to care if the existing size we
Brian Stinson 2593d8
                                  * got over dbus is appropriate. */
Brian Stinson 2593d8
-                                r = cpu_set_add_all(&c->cpu_set, &set);
Brian Stinson 2593d8
+                                r = cpu_set_add_all(affinity ? &c->cpu_set : &c->numa_policy.nodes, &set);
Brian Stinson 2593d8
                                 if (r < 0)
Brian Stinson 2593d8
                                         return r;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
@@ -1587,6 +1632,20 @@ int bus_exec_context_set_transient_property(
Brian Stinson 2593d8
 
Brian Stinson 2593d8
                 return 1;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
+        } else if (streq(name, "NUMAPolicy")) {
Brian Stinson 2593d8
+                int32_t type;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                r = sd_bus_message_read(message, "i", &type);
Brian Stinson 2593d8
+                if (r < 0)
Brian Stinson 2593d8
+                        return r;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                if (!mpol_is_valid(type))
Brian Stinson 2593d8
+                        return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid NUMAPolicy value: %i", type);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                if (!UNIT_WRITE_FLAGS_NOOP(flags))
Brian Stinson 2593d8
+                        c->numa_policy.type = type;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                return 1;
Brian Stinson 2593d8
         } else if (streq(name, "IOSchedulingClass")) {
Brian Stinson 2593d8
                 int32_t q;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
diff --git a/src/core/execute.c b/src/core/execute.c
Brian Stinson 2593d8
index bc26aa66e7..56aa89e1ec 100644
Brian Stinson 2593d8
--- a/src/core/execute.c
Brian Stinson 2593d8
+++ b/src/core/execute.c
Brian Stinson 2593d8
@@ -2997,6 +2997,16 @@ static int exec_child(
Brian Stinson 2593d8
                         return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
Brian Stinson 2593d8
                 }
Brian Stinson 2593d8
 
Brian Stinson 2593d8
+        if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
Brian Stinson 2593d8
+                r = apply_numa_policy(&context->numa_policy);
Brian Stinson 2593d8
+                if (r == -EOPNOTSUPP)
Brian Stinson 2593d8
+                        log_unit_debug_errno(unit, r, "NUMA support not available, ignoring.");
Brian Stinson 2593d8
+                else if (r < 0) {
Brian Stinson 2593d8
+                        *exit_status = EXIT_NUMA_POLICY;
Brian Stinson 2593d8
+                        return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
Brian Stinson 2593d8
+                }
Brian Stinson 2593d8
+        }
Brian Stinson 2593d8
+
Brian Stinson 2593d8
         if (context->ioprio_set)
Brian Stinson 2593d8
                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
Brian Stinson 2593d8
                         *exit_status = EXIT_IOPRIO;
Brian Stinson 2593d8
@@ -3651,6 +3661,7 @@ void exec_context_init(ExecContext *c) {
Brian Stinson 2593d8
         assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
Brian Stinson 2593d8
         c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
Brian Stinson 2593d8
         c->log_level_max = -1;
Brian Stinson 2593d8
+        numa_policy_reset(&c->numa_policy);
Brian Stinson 2593d8
 }
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 void exec_context_done(ExecContext *c) {
Brian Stinson 2593d8
@@ -3695,6 +3706,7 @@ void exec_context_done(ExecContext *c) {
Brian Stinson 2593d8
         c->n_temporary_filesystems = 0;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         cpu_set_reset(&c->cpu_set);
Brian Stinson 2593d8
+        numa_policy_reset(&c->numa_policy);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         c->utmp_id = mfree(c->utmp_id);
Brian Stinson 2593d8
         c->selinux_context = mfree(c->selinux_context);
Brian Stinson 2593d8
@@ -4104,6 +4116,14 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
Brian Stinson 2593d8
                 fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
Brian Stinson 2593d8
         }
Brian Stinson 2593d8
 
Brian Stinson 2593d8
+        if (mpol_is_valid(numa_policy_get_type(&c->numa_policy))) {
Brian Stinson 2593d8
+                _cleanup_free_ char *nodes = NULL;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                nodes = cpu_set_to_range_string(&c->numa_policy.nodes);
Brian Stinson 2593d8
+                fprintf(f, "%sNUMAPolicy: %s\n", prefix, mpol_to_string(numa_policy_get_type(&c->numa_policy)));
Brian Stinson 2593d8
+                fprintf(f, "%sNUMAMask: %s\n", prefix, strnull(nodes));
Brian Stinson 2593d8
+        }
Brian Stinson 2593d8
+
Brian Stinson 2593d8
         if (c->timer_slack_nsec != NSEC_INFINITY)
Brian Stinson 2593d8
                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
diff --git a/src/core/execute.h b/src/core/execute.h
Brian Stinson 2593d8
index e1e7a494cd..b2eb55f8f5 100644
Brian Stinson 2593d8
--- a/src/core/execute.h
Brian Stinson 2593d8
+++ b/src/core/execute.h
Brian Stinson 2593d8
@@ -150,6 +150,7 @@ struct ExecContext {
Brian Stinson 2593d8
         int cpu_sched_priority;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         CPUSet cpu_set;
Brian Stinson 2593d8
+        NUMAPolicy numa_policy;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         ExecInput std_input;
Brian Stinson 2593d8
         ExecOutput std_output;
Brian Stinson 2593d8
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
Brian Stinson 2593d8
index 1066bcfb8f..cdf4d14c4e 100644
Brian Stinson 2593d8
--- a/src/core/load-fragment-gperf.gperf.m4
Brian Stinson 2593d8
+++ b/src/core/load-fragment-gperf.gperf.m4
Brian Stinson 2593d8
@@ -36,6 +36,8 @@ $1.CPUSchedulingPolicy,          config_parse_exec_cpu_sched_policy, 0,
Brian Stinson 2593d8
 $1.CPUSchedulingPriority,        config_parse_exec_cpu_sched_prio,   0,                             offsetof($1, exec_context)
Brian Stinson 2593d8
 $1.CPUSchedulingResetOnFork,     config_parse_bool,                  0,                             offsetof($1, exec_context.cpu_sched_reset_on_fork)
Brian Stinson 2593d8
 $1.CPUAffinity,                  config_parse_exec_cpu_affinity,     0,                             offsetof($1, exec_context)
Brian Stinson 2593d8
+$1.NUMAPolicy,                   config_parse_numa_policy,           0,                             offsetof($1, exec_context.numa_policy.type)
Brian Stinson 2593d8
+$1.NUMAMask,                     config_parse_numa_mask,             0,                             offsetof($1, exec_context.numa_policy)
Brian Stinson 2593d8
 $1.UMask,                        config_parse_mode,                  0,                             offsetof($1, exec_context.umask)
Brian Stinson 2593d8
 $1.Environment,                  config_parse_environ,               0,                             offsetof($1, exec_context.environment)
Brian Stinson 2593d8
 $1.EnvironmentFile,              config_parse_unit_env_file,         0,                             offsetof($1, exec_context.environment_files)
Brian Stinson 2593d8
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
Brian Stinson 2593d8
index 34ae834188..35dd595098 100644
Brian Stinson 2593d8
--- a/src/core/load-fragment.c
Brian Stinson 2593d8
+++ b/src/core/load-fragment.c
Brian Stinson 2593d8
@@ -93,6 +93,7 @@ DEFINE_CONFIG_PARSE_PTR(config_parse_blockio_weight, cg_blkio_weight_parse, uint
Brian Stinson 2593d8
 DEFINE_CONFIG_PARSE_PTR(config_parse_cg_weight, cg_weight_parse, uint64_t, "Invalid weight");
Brian Stinson 2593d8
 DEFINE_CONFIG_PARSE_PTR(config_parse_cpu_shares, cg_cpu_shares_parse, uint64_t, "Invalid CPU shares");
Brian Stinson 2593d8
 DEFINE_CONFIG_PARSE_PTR(config_parse_exec_mount_flags, mount_propagation_flags_from_string, unsigned long, "Failed to parse mount flag");
Brian Stinson 2593d8
+DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_numa_policy, mpol, int, -1, "Invalid NUMA policy type");
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 int config_parse_unit_deps(
Brian Stinson 2593d8
                 const char *unit,
Brian Stinson 2593d8
@@ -1159,6 +1160,33 @@ int config_parse_exec_cpu_sched_policy(const char *unit,
Brian Stinson 2593d8
         return 0;
Brian Stinson 2593d8
 }
Brian Stinson 2593d8
 
Brian Stinson 2593d8
+int config_parse_numa_mask(const char *unit,
Brian Stinson 2593d8
+                           const char *filename,
Brian Stinson 2593d8
+                           unsigned line,
Brian Stinson 2593d8
+                           const char *section,
Brian Stinson 2593d8
+                           unsigned section_line,
Brian Stinson 2593d8
+                           const char *lvalue,
Brian Stinson 2593d8
+                           int ltype,
Brian Stinson 2593d8
+                           const char *rvalue,
Brian Stinson 2593d8
+                           void *data,
Brian Stinson 2593d8
+                           void *userdata) {
Brian Stinson 2593d8
+        int r;
Brian Stinson 2593d8
+        NUMAPolicy *p = data;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        assert(filename);
Brian Stinson 2593d8
+        assert(lvalue);
Brian Stinson 2593d8
+        assert(rvalue);
Brian Stinson 2593d8
+        assert(data);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        r = parse_cpu_set_extend(rvalue, &p->nodes, true, unit, filename, line, lvalue);
Brian Stinson 2593d8
+        if (r < 0) {
Brian Stinson 2593d8
+                log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse NUMA node mask, ignoring: %s", rvalue);
Brian Stinson 2593d8
+                return 0;
Brian Stinson 2593d8
+        }
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        return r;
Brian Stinson 2593d8
+}
Brian Stinson 2593d8
+
Brian Stinson 2593d8
 int config_parse_exec_cpu_sched_prio(const char *unit,
Brian Stinson 2593d8
                                      const char *filename,
Brian Stinson 2593d8
                                      unsigned line,
Brian Stinson 2593d8
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
Brian Stinson 2593d8
index dad281ef72..f2ca1b8ee7 100644
Brian Stinson 2593d8
--- a/src/core/load-fragment.h
Brian Stinson 2593d8
+++ b/src/core/load-fragment.h
Brian Stinson 2593d8
@@ -102,6 +102,8 @@ CONFIG_PARSER_PROTOTYPE(config_parse_job_timeout_sec);
Brian Stinson 2593d8
 CONFIG_PARSER_PROTOTYPE(config_parse_job_running_timeout_sec);
Brian Stinson 2593d8
 CONFIG_PARSER_PROTOTYPE(config_parse_log_extra_fields);
Brian Stinson 2593d8
 CONFIG_PARSER_PROTOTYPE(config_parse_collect_mode);
Brian Stinson 2593d8
+CONFIG_PARSER_PROTOTYPE(config_parse_numa_policy);
Brian Stinson 2593d8
+CONFIG_PARSER_PROTOTYPE(config_parse_numa_mask);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 /* gperf prototypes */
Brian Stinson 2593d8
 const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
Brian Stinson 2593d8
diff --git a/src/core/main.c b/src/core/main.c
Brian Stinson 2593d8
index c74dc641c1..83f9dd5878 100644
Brian Stinson 2593d8
--- a/src/core/main.c
Brian Stinson 2593d8
+++ b/src/core/main.c
Brian Stinson 2593d8
@@ -134,6 +134,7 @@ static uint64_t arg_default_tasks_max;
Brian Stinson 2593d8
 static sd_id128_t arg_machine_id;
Brian Stinson 2593d8
 static EmergencyAction arg_cad_burst_action;
Brian Stinson 2593d8
 static CPUSet arg_cpu_affinity;
Brian Stinson 2593d8
+static NUMAPolicy arg_numa_policy;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 static int parse_configuration(void);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
@@ -660,6 +661,8 @@ static int parse_config_file(void) {
Brian Stinson 2593d8
                 { "Manager", "ShowStatus",                config_parse_show_status,      0, &arg_show_status                       },
Brian Stinson 2593d8
                 { "Manager", "CPUAffinity",               config_parse_cpu_affinity2,    0, &arg_cpu_affinity                      },
Brian Stinson 2593d8
                 { "Manager", "JoinControllers",           config_parse_join_controllers, 0, &arg_join_controllers                  },
Brian Stinson 2593d8
+                { "Manager", "NUMAPolicy",                config_parse_numa_policy,      0, &arg_numa_policy.type                  },
Brian Stinson 2593d8
+                { "Manager", "NUMAMask",                  config_parse_numa_mask,        0, &arg_numa_policy                       },
Brian Stinson 2593d8
                 { "Manager", "RuntimeWatchdogSec",        config_parse_sec,              0, &arg_runtime_watchdog                  },
Brian Stinson 2593d8
                 { "Manager", "ShutdownWatchdogSec",       config_parse_sec,              0, &arg_shutdown_watchdog                 },
Brian Stinson 2593d8
                 { "Manager", "WatchdogDevice",            config_parse_path,             0, &arg_watchdog_device                   },
Brian Stinson 2593d8
@@ -1501,6 +1504,27 @@ static void update_cpu_affinity(bool skip_setup) {
Brian Stinson 2593d8
                 log_warning_errno(errno, "Failed to set CPU affinity: %m");
Brian Stinson 2593d8
 }
Brian Stinson 2593d8
 
Brian Stinson 2593d8
+static void update_numa_policy(bool skip_setup) {
Brian Stinson 2593d8
+        int r;
Brian Stinson 2593d8
+        _cleanup_free_ char *nodes = NULL;
Brian Stinson 2593d8
+        const char * policy = NULL;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        if (skip_setup || !mpol_is_valid(numa_policy_get_type(&arg_numa_policy)))
Brian Stinson 2593d8
+                return;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        if (DEBUG_LOGGING) {
Brian Stinson 2593d8
+                policy = mpol_to_string(numa_policy_get_type(&arg_numa_policy));
Brian Stinson 2593d8
+                nodes = cpu_set_to_range_string(&arg_numa_policy.nodes);
Brian Stinson 2593d8
+                log_debug("Setting NUMA policy to %s, with nodes %s.", strnull(policy), strnull(nodes));
Brian Stinson 2593d8
+        }
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        r = apply_numa_policy(&arg_numa_policy);
Brian Stinson 2593d8
+        if (r == -EOPNOTSUPP)
Brian Stinson 2593d8
+                log_debug_errno(r, "NUMA support not available, ignoring.");
Brian Stinson 2593d8
+        else if (r < 0)
Brian Stinson 2593d8
+                log_warning_errno(r, "Failed to set NUMA memory policy: %m");
Brian Stinson 2593d8
+}
Brian Stinson 2593d8
+
Brian Stinson 2593d8
 static void do_reexecute(
Brian Stinson 2593d8
                 int argc,
Brian Stinson 2593d8
                 char *argv[],
Brian Stinson 2593d8
@@ -1672,6 +1696,7 @@ static int invoke_main_loop(
Brian Stinson 2593d8
                         set_manager_defaults(m);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
                         update_cpu_affinity(false);
Brian Stinson 2593d8
+                        update_numa_policy(false);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
                         if (saved_log_level >= 0)
Brian Stinson 2593d8
                                 manager_override_log_level(m, saved_log_level);
Brian Stinson 2593d8
@@ -1832,6 +1857,7 @@ static int initialize_runtime(
Brian Stinson 2593d8
                 return 0;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         update_cpu_affinity(skip_setup);
Brian Stinson 2593d8
+        update_numa_policy(skip_setup);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         if (arg_system) {
Brian Stinson 2593d8
                 /* Make sure we leave a core dump without panicing the kernel. */
Brian Stinson 2593d8
@@ -2011,6 +2037,7 @@ static void reset_arguments(void) {
Brian Stinson 2593d8
         arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         cpu_set_reset(&arg_cpu_affinity);
Brian Stinson 2593d8
+        numa_policy_reset(&arg_numa_policy);
Brian Stinson 2593d8
 }
Brian Stinson 2593d8
 
Brian Stinson 2593d8
 static int parse_configuration(void) {
Brian Stinson 2593d8
diff --git a/src/core/system.conf.in b/src/core/system.conf.in
Brian Stinson 2593d8
index 653ec6b8c9..0d93fbf147 100644
Brian Stinson 2593d8
--- a/src/core/system.conf.in
Brian Stinson 2593d8
+++ b/src/core/system.conf.in
Brian Stinson 2593d8
@@ -24,6 +24,8 @@
Brian Stinson 2593d8
 #CtrlAltDelBurstAction=reboot-force
Brian Stinson 2593d8
 #CPUAffinity=1 2
Brian Stinson 2593d8
 #JoinControllers=cpu,cpuacct net_cls,net_prio
Brian Stinson 2593d8
+#NUMAPolicy=default
Brian Stinson 2593d8
+#NUMAMask=
Brian Stinson 2593d8
 #RuntimeWatchdogSec=0
Brian Stinson 2593d8
 #ShutdownWatchdogSec=10min
Brian Stinson 2593d8
 #CapabilityBoundingSet=
Brian Stinson 2593d8
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
Brian Stinson 2593d8
index ec8732c226..055edd6e22 100644
Brian Stinson 2593d8
--- a/src/shared/bus-unit-util.c
Brian Stinson 2593d8
+++ b/src/shared/bus-unit-util.c
Brian Stinson 2593d8
@@ -947,6 +947,34 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
Brian Stinson 2593d8
                 return bus_append_byte_array(m, field, array, allocated);
Brian Stinson 2593d8
         }
Brian Stinson 2593d8
 
Brian Stinson 2593d8
+        if (streq(field, "NUMAPolicy")) {
Brian Stinson 2593d8
+                r = mpol_from_string(eq);
Brian Stinson 2593d8
+                if (r < 0)
Brian Stinson 2593d8
+                        return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                r = sd_bus_message_append(m, "(sv)", field, "i", (int32_t) r);
Brian Stinson 2593d8
+                if (r < 0)
Brian Stinson 2593d8
+                        return bus_log_create_error(r);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                return 1;
Brian Stinson 2593d8
+        }
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+        if (streq(field, "NUMAMask")) {
Brian Stinson 2593d8
+                _cleanup_(cpu_set_reset) CPUSet nodes = {};
Brian Stinson 2593d8
+                _cleanup_free_ uint8_t *array = NULL;
Brian Stinson 2593d8
+                size_t allocated;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                r = parse_cpu_set(eq, &nodes);
Brian Stinson 2593d8
+                if (r < 0)
Brian Stinson 2593d8
+                        return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                r = cpu_set_to_dbus(&nodes, &array, &allocated);
Brian Stinson 2593d8
+                if (r < 0)
Brian Stinson 2593d8
+                        return log_error_errno(r, "Failed to serialize NUMAMask: %m");
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                return bus_append_byte_array(m, field, array, allocated);
Brian Stinson 2593d8
+        }
Brian Stinson 2593d8
+
Brian Stinson 2593d8
         if (STR_IN_SET(field, "RestrictAddressFamilies", "SystemCallFilter")) {
Brian Stinson 2593d8
                 int whitelist = 1;
Brian Stinson 2593d8
                 const char *p = eq;
Brian Stinson 2593d8
diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c
Brian Stinson 2593d8
index 0154b300a3..7274921e6d 100644
Brian Stinson 2593d8
--- a/src/systemctl/systemctl.c
Brian Stinson 2593d8
+++ b/src/systemctl/systemctl.c
Brian Stinson 2593d8
@@ -4573,6 +4573,20 @@ static int print_property(const char *name, sd_bus_message *m, bool value, bool
Brian Stinson 2593d8
 
Brian Stinson 2593d8
         switch (bus_type) {
Brian Stinson 2593d8
 
Brian Stinson 2593d8
+        case SD_BUS_TYPE_INT32:
Brian Stinson 2593d8
+                if (streq(name, "NUMAPolicy")) {
Brian Stinson 2593d8
+                        int32_t i;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                        r = sd_bus_message_read_basic(m, bus_type, &i);
Brian Stinson 2593d8
+                        if (r < 0)
Brian Stinson 2593d8
+                                return r;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                        print_prop(name, "%s", strna(mpol_to_string(i)));
Brian Stinson 2593d8
+
Brian Stinson 2593d8
+                        return 1;
Brian Stinson 2593d8
+                }
Brian Stinson 2593d8
+                break;
Brian Stinson 2593d8
+
Brian Stinson 2593d8
         case SD_BUS_TYPE_STRUCT:
Brian Stinson 2593d8
 
Brian Stinson 2593d8
                 if (contents[0] == SD_BUS_TYPE_UINT32 && streq(name, "Job")) {
Brian Stinson 2593d8
@@ -4878,7 +4892,7 @@ static int print_property(const char *name, sd_bus_message *m, bool value, bool
Brian Stinson 2593d8
                         print_prop(name, "%s", h);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
                         return 1;
Brian Stinson 2593d8
-                } else if (contents[0] == SD_BUS_TYPE_BYTE && streq(name, "CPUAffinity")) {
Brian Stinson 2593d8
+                } else if (contents[0] == SD_BUS_TYPE_BYTE && STR_IN_SET(name, "CPUAffinity", "NUMAMask")) {
Brian Stinson 2593d8
                         _cleanup_free_ char *affinity = NULL;
Brian Stinson 2593d8
                         _cleanup_(cpu_set_reset) CPUSet set = {};
Brian Stinson 2593d8
                         const void *a;
Brian Stinson 2593d8
@@ -4890,7 +4904,7 @@ static int print_property(const char *name, sd_bus_message *m, bool value, bool
Brian Stinson 2593d8
 
Brian Stinson 2593d8
                         r = cpu_set_from_dbus(a, n, &set);
Brian Stinson 2593d8
                         if (r < 0)
Brian Stinson 2593d8
-                                return log_error_errno(r, "Failed to deserialize CPUAffinity: %m");
Brian Stinson 2593d8
+                                return log_error_errno(r, "Failed to deserialize %s: %m", name);
Brian Stinson 2593d8
 
Brian Stinson 2593d8
                         affinity = cpu_set_to_range_string(&set);
Brian Stinson 2593d8
                         if (!affinity)