Blame SOURCES/numactl-2.0.12-libnuma-introduce-an-API-to-outdate-cpu-to-node-mapp.patch

25194a
From 3cc2e004cd5891a87d8bde2b9ddd814f68d1835a Mon Sep 17 00:00:00 2001
25194a
From: Pingfan Liu <piliu@redhat.com>
25194a
Date: Mon, 15 Jul 2019 16:24:39 +0800
25194a
Subject: [PATCH 1/2] libnuma: introduce an API to outdate cpu to node mapping
25194a
25194a
numa_node_to_cpus() caches the cpu to node mapping, and not updates it
25194a
during the cpu online/offline event.
25194a
25194a
Ideally, in order to update the mapping automatically, it requires
25194a
something like udev to spy on kernel event socket, and update cache if
25194a
event happen. This solution is a little complicated inside a libnuma.so. In
25194a
stead of doing so, exposing an API numa_node_to_cpu_outdated() for user,
25194a
and saddling the event-detecting task to the user.
25194a
25194a
So the user of libnuma can work using either of the following models:
25194a
 -1. blindless outdate cache if careless about performance
25194a
     numa_node_to_cpu_outdated();
25194a
     numa_node_to_cpu();
25194a
 -2. event driven spy on kernel event, if happened, call
25194a
     numa_node_to_cpu_outdated();
25194a
25194a
Signed-off-by: Pingfan Liu <piliu@redhat.com>
25194a
---
25194a
 libnuma.c         | 32 ++++++++++++++++++++++++++++----
25194a
 numa.3            |  7 +++++++
25194a
 numa.h            |  2 ++
25194a
 versions.ldscript |  1 +
25194a
 4 files changed, 38 insertions(+), 4 deletions(-)
25194a
25194a
diff --git a/libnuma.c b/libnuma.c
25194a
index cac8851..756a171 100644
25194a
--- a/libnuma.c
25194a
+++ b/libnuma.c
25194a
@@ -58,7 +58,9 @@ struct bitmask *numa_possible_cpus_ptr = NULL;
25194a
 struct bitmask *numa_nodes_ptr = NULL;
25194a
 static struct bitmask *numa_memnode_ptr = NULL;
25194a
 static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES];
25194a
+static char node_cpu_mask_v1_stale = 1;
25194a
 static struct bitmask **node_cpu_mask_v2;
25194a
+static char node_cpu_mask_v2_stale = 1;
25194a
 
25194a
 WEAK void numa_error(char *where);
25194a
 
25194a
@@ -1272,6 +1274,7 @@ numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
25194a
 	int err = 0;
25194a
 	char fn[64];
25194a
 	FILE *f;
25194a
+	char update;
25194a
 	char *line = NULL;
25194a
 	size_t len = 0;
25194a
 	struct bitmask bitmask;
25194a
@@ -1287,7 +1290,8 @@ numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
25194a
 	}
25194a
 	if (bufferlen > buflen_needed)
25194a
 		memset(buffer, 0, bufferlen);
25194a
-	if (node_cpu_mask_v1[node]) {
25194a
+	update = __atomic_fetch_and(&node_cpu_mask_v1_stale, 0, __ATOMIC_RELAXED);
25194a
+	if (node_cpu_mask_v1[node] && !update) {
25194a
 		memcpy(buffer, node_cpu_mask_v1[node], buflen_needed);
25194a
 		return 0;
25194a
 	}
25194a
@@ -1328,7 +1332,15 @@ numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
25194a
 
25194a
 	/* slightly racy, see above */
25194a
 	if (node_cpu_mask_v1[node]) {
25194a
-		if (mask != buffer)
25194a
+		if (update) {
25194a
+			/*
25194a
+			 * There may be readers on node_cpu_mask_v1[], hence it can not
25194a
+			 * be freed.
25194a
+			 */
25194a
+			memcpy(node_cpu_mask_v1[node], mask, buflen_needed);
25194a
+			free(mask);
25194a
+			mask = NULL;
25194a
+		} else if (mask != buffer)
25194a
 			free(mask);
25194a
 	} else {
25194a
 		node_cpu_mask_v1[node] = mask;
25194a
@@ -1352,6 +1364,7 @@ numa_node_to_cpus_v2(int node, struct bitmask *buffer)
25194a
 	int nnodes = numa_max_node();
25194a
 	char fn[64], *line = NULL;
25194a
 	FILE *f;
25194a
+	char update;
25194a
 	size_t len = 0;
25194a
 	struct bitmask *mask;
25194a
 
25194a
@@ -1364,7 +1377,8 @@ numa_node_to_cpus_v2(int node, struct bitmask *buffer)
25194a
 	}
25194a
 	numa_bitmask_clearall(buffer);
25194a
 
25194a
-	if (node_cpu_mask_v2[node]) {
25194a
+	update = __atomic_fetch_and(&node_cpu_mask_v2_stale, 0, __ATOMIC_RELAXED);
25194a
+	if (node_cpu_mask_v2[node] && !update) {
25194a
 		/* have already constructed a mask for this node */
25194a
 		if (buffer->size < node_cpu_mask_v2[node]->size) {
25194a
 			errno = EINVAL;
25194a
@@ -1407,8 +1421,12 @@ numa_node_to_cpus_v2(int node, struct bitmask *buffer)
25194a
 	/* slightly racy, see above */
25194a
 	/* save the mask we created */
25194a
 	if (node_cpu_mask_v2[node]) {
25194a
+		if (update) {
25194a
+			copy_bitmask_to_bitmask(mask, node_cpu_mask_v2[node]);
25194a
+			numa_bitmask_free(mask);
25194a
+			mask = NULL;
25194a
 		/* how could this be? */
25194a
-		if (mask != buffer)
25194a
+		} else if (mask != buffer)
25194a
 			numa_bitmask_free(mask);
25194a
 	} else {
25194a
 		/* we don't want to cache faulty result */
25194a
@@ -1424,6 +1442,12 @@ __asm__(".symver numa_node_to_cpus_v2,numa_node_to_cpus@@libnuma_1.2");
25194a
 make_internal_alias(numa_node_to_cpus_v1);
25194a
 make_internal_alias(numa_node_to_cpus_v2);
25194a
 
25194a
+void numa_node_to_cpu_update(void)
25194a
+{
25194a
+	__atomic_store_n(&node_cpu_mask_v1_stale, 1, __ATOMIC_RELAXED);
25194a
+	__atomic_store_n(&node_cpu_mask_v2_stale, 1, __ATOMIC_RELAXED);
25194a
+}
25194a
+
25194a
 /* report the node of the specified cpu */
25194a
 int numa_node_of_cpu(int cpu)
25194a
 {
25194a
diff --git a/numa.3 b/numa.3
25194a
index ba00572..e54cb0b 100644
25194a
--- a/numa.3
25194a
+++ b/numa.3
25194a
@@ -124,6 +124,8 @@ numa \- NUMA policy library
25194a
 .br
25194a
 .BI "int numa_node_to_cpus(int " node ", struct bitmask *" mask ");
25194a
 .br
25194a
+.BI "void numa_node_to_cpu_update();"
25194a
+.br
25194a
 .BI "int numa_node_of_cpu(int " cpu ");
25194a
 .sp
25194a
 .BI "struct bitmask *numa_allocate_cpumask();"
25194a
@@ -232,6 +234,7 @@ Most functions in this library are only concerned about numa nodes and
25194a
 their memory.
25194a
 The exceptions to this are:
25194a
 .IR numa_node_to_cpus (),
25194a
+.IR numa_node_to_cpu_update (),
25194a
 .IR numa_node_of_cpu (),
25194a
 .IR numa_bind (),
25194a
 .IR numa_run_on_node (),
25194a
@@ -795,6 +798,10 @@ will be set to
25194a
 .I ERANGE
25194a
 and \-1 returned. On success 0 is returned.
25194a
 
25194a
+.BR numa_node_to_cpu_update ()
25194a
+Mark the node's cpus bitmask stale, then get the latest bitmask by calling
25194a
+.BR numa_node_to_cpus ()
25194a
+
25194a
 .BR numa_node_of_cpu ()
25194a
 returns the node that a cpu belongs to. If the user supplies an invalid cpu
25194a
 .I errno
25194a
diff --git a/numa.h b/numa.h
25194a
index 3a8c543..7316d1e 100644
25194a
--- a/numa.h
25194a
+++ b/numa.h
25194a
@@ -282,6 +282,8 @@ static inline void numa_free_cpumask(struct bitmask *b)
25194a
 /* Convert node to CPU mask. -1/errno on failure, otherwise 0. */
25194a
 int numa_node_to_cpus(int, struct bitmask *);
25194a
 
25194a
+void numa_node_to_cpu_update(void);
25194a
+
25194a
 /* report the node of the specified cpu. -1/errno on invalid cpu. */
25194a
 int numa_node_of_cpu(int cpu);
25194a
 
25194a
diff --git a/versions.ldscript b/versions.ldscript
25194a
index 4b04936..23074a0 100644
25194a
--- a/versions.ldscript
25194a
+++ b/versions.ldscript
25194a
@@ -60,6 +60,7 @@ libnuma_1.1 {
25194a
     numa_tonodemask_memory;
25194a
     numa_warn;
25194a
     numa_exit_on_warn;
25194a
+    numa_node_to_cpu_update;
25194a
   local:
25194a
     *;
25194a
 };
25194a
-- 
25194a
2.7.5
25194a