Blob Blame History Raw
From 3cc2e004cd5891a87d8bde2b9ddd814f68d1835a Mon Sep 17 00:00:00 2001
From: Pingfan Liu <piliu@redhat.com>
Date: Mon, 15 Jul 2019 16:24:39 +0800
Subject: [PATCH 1/2] libnuma: introduce an API to outdate cpu to node mapping

numa_node_to_cpus() caches the cpu to node mapping, and not updates it
during the cpu online/offline event.

Ideally, in order to update the mapping automatically, it requires
something like udev to spy on kernel event socket, and update cache if
event happen. This solution is a little complicated inside a libnuma.so. In
stead of doing so, exposing an API numa_node_to_cpu_outdated() for user,
and saddling the event-detecting task to the user.

So the user of libnuma can work using either of the following models:
 -1. blindless outdate cache if careless about performance
     numa_node_to_cpu_outdated();
     numa_node_to_cpu();
 -2. event driven spy on kernel event, if happened, call
     numa_node_to_cpu_outdated();

Signed-off-by: Pingfan Liu <piliu@redhat.com>
---
 libnuma.c         | 32 ++++++++++++++++++++++++++++----
 numa.3            |  7 +++++++
 numa.h            |  2 ++
 versions.ldscript |  1 +
 4 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/libnuma.c b/libnuma.c
index cac8851..756a171 100644
--- a/libnuma.c
+++ b/libnuma.c
@@ -58,7 +58,9 @@ struct bitmask *numa_possible_cpus_ptr = NULL;
 struct bitmask *numa_nodes_ptr = NULL;
 static struct bitmask *numa_memnode_ptr = NULL;
 static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES];
+static char node_cpu_mask_v1_stale = 1;
 static struct bitmask **node_cpu_mask_v2;
+static char node_cpu_mask_v2_stale = 1;
 
 WEAK void numa_error(char *where);
 
@@ -1272,6 +1274,7 @@ numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
 	int err = 0;
 	char fn[64];
 	FILE *f;
+	char update;
 	char *line = NULL;
 	size_t len = 0;
 	struct bitmask bitmask;
@@ -1287,7 +1290,8 @@ numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
 	}
 	if (bufferlen > buflen_needed)
 		memset(buffer, 0, bufferlen);
-	if (node_cpu_mask_v1[node]) {
+	update = __atomic_fetch_and(&node_cpu_mask_v1_stale, 0, __ATOMIC_RELAXED);
+	if (node_cpu_mask_v1[node] && !update) {
 		memcpy(buffer, node_cpu_mask_v1[node], buflen_needed);
 		return 0;
 	}
@@ -1328,7 +1332,15 @@ numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
 
 	/* slightly racy, see above */
 	if (node_cpu_mask_v1[node]) {
-		if (mask != buffer)
+		if (update) {
+			/*
+			 * There may be readers on node_cpu_mask_v1[], hence it can not
+			 * be freed.
+			 */
+			memcpy(node_cpu_mask_v1[node], mask, buflen_needed);
+			free(mask);
+			mask = NULL;
+		} else if (mask != buffer)
 			free(mask);
 	} else {
 		node_cpu_mask_v1[node] = mask;
@@ -1352,6 +1364,7 @@ numa_node_to_cpus_v2(int node, struct bitmask *buffer)
 	int nnodes = numa_max_node();
 	char fn[64], *line = NULL;
 	FILE *f;
+	char update;
 	size_t len = 0;
 	struct bitmask *mask;
 
@@ -1364,7 +1377,8 @@ numa_node_to_cpus_v2(int node, struct bitmask *buffer)
 	}
 	numa_bitmask_clearall(buffer);
 
-	if (node_cpu_mask_v2[node]) {
+	update = __atomic_fetch_and(&node_cpu_mask_v2_stale, 0, __ATOMIC_RELAXED);
+	if (node_cpu_mask_v2[node] && !update) {
 		/* have already constructed a mask for this node */
 		if (buffer->size < node_cpu_mask_v2[node]->size) {
 			errno = EINVAL;
@@ -1407,8 +1421,12 @@ numa_node_to_cpus_v2(int node, struct bitmask *buffer)
 	/* slightly racy, see above */
 	/* save the mask we created */
 	if (node_cpu_mask_v2[node]) {
+		if (update) {
+			copy_bitmask_to_bitmask(mask, node_cpu_mask_v2[node]);
+			numa_bitmask_free(mask);
+			mask = NULL;
 		/* how could this be? */
-		if (mask != buffer)
+		} else if (mask != buffer)
 			numa_bitmask_free(mask);
 	} else {
 		/* we don't want to cache faulty result */
@@ -1424,6 +1442,12 @@ __asm__(".symver numa_node_to_cpus_v2,numa_node_to_cpus@@libnuma_1.2");
 make_internal_alias(numa_node_to_cpus_v1);
 make_internal_alias(numa_node_to_cpus_v2);
 
+void numa_node_to_cpu_update(void)
+{
+	__atomic_store_n(&node_cpu_mask_v1_stale, 1, __ATOMIC_RELAXED);
+	__atomic_store_n(&node_cpu_mask_v2_stale, 1, __ATOMIC_RELAXED);
+}
+
 /* report the node of the specified cpu */
 int numa_node_of_cpu(int cpu)
 {
diff --git a/numa.3 b/numa.3
index ba00572..e54cb0b 100644
--- a/numa.3
+++ b/numa.3
@@ -124,6 +124,8 @@ numa \- NUMA policy library
 .br
 .BI "int numa_node_to_cpus(int " node ", struct bitmask *" mask ");
 .br
+.BI "void numa_node_to_cpu_update();"
+.br
 .BI "int numa_node_of_cpu(int " cpu ");
 .sp
 .BI "struct bitmask *numa_allocate_cpumask();"
@@ -232,6 +234,7 @@ Most functions in this library are only concerned about numa nodes and
 their memory.
 The exceptions to this are:
 .IR numa_node_to_cpus (),
+.IR numa_node_to_cpu_update (),
 .IR numa_node_of_cpu (),
 .IR numa_bind (),
 .IR numa_run_on_node (),
@@ -795,6 +798,10 @@ will be set to
 .I ERANGE
 and \-1 returned. On success 0 is returned.
 
+.BR numa_node_to_cpu_update ()
+Mark the node's cpus bitmask stale, then get the latest bitmask by calling
+.BR numa_node_to_cpus ()
+
 .BR numa_node_of_cpu ()
 returns the node that a cpu belongs to. If the user supplies an invalid cpu
 .I errno
diff --git a/numa.h b/numa.h
index 3a8c543..7316d1e 100644
--- a/numa.h
+++ b/numa.h
@@ -282,6 +282,8 @@ static inline void numa_free_cpumask(struct bitmask *b)
 /* Convert node to CPU mask. -1/errno on failure, otherwise 0. */
 int numa_node_to_cpus(int, struct bitmask *);
 
+void numa_node_to_cpu_update(void);
+
 /* report the node of the specified cpu. -1/errno on invalid cpu. */
 int numa_node_of_cpu(int cpu);
 
diff --git a/versions.ldscript b/versions.ldscript
index 4b04936..23074a0 100644
--- a/versions.ldscript
+++ b/versions.ldscript
@@ -60,6 +60,7 @@ libnuma_1.1 {
     numa_tonodemask_memory;
     numa_warn;
     numa_exit_on_warn;
+    numa_node_to_cpu_update;
   local:
     *;
 };
-- 
2.7.5