From 3cc2e004cd5891a87d8bde2b9ddd814f68d1835a Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Mon, 15 Jul 2019 16:24:39 +0800 Subject: [PATCH 1/2] libnuma: introduce an API to outdate cpu to node mapping numa_node_to_cpus() caches the cpu to node mapping, and not updates it during the cpu online/offline event. Ideally, in order to update the mapping automatically, it requires something like udev to spy on kernel event socket, and update cache if event happen. This solution is a little complicated inside a libnuma.so. In stead of doing so, exposing an API numa_node_to_cpu_outdated() for user, and saddling the event-detecting task to the user. So the user of libnuma can work using either of the following models: -1. blindless outdate cache if careless about performance numa_node_to_cpu_outdated(); numa_node_to_cpu(); -2. event driven spy on kernel event, if happened, call numa_node_to_cpu_outdated(); Signed-off-by: Pingfan Liu --- libnuma.c | 32 ++++++++++++++++++++++++++++---- numa.3 | 7 +++++++ numa.h | 2 ++ versions.ldscript | 1 + 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/libnuma.c b/libnuma.c index cac8851..756a171 100644 --- a/libnuma.c +++ b/libnuma.c @@ -58,7 +58,9 @@ struct bitmask *numa_possible_cpus_ptr = NULL; struct bitmask *numa_nodes_ptr = NULL; static struct bitmask *numa_memnode_ptr = NULL; static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES]; +static char node_cpu_mask_v1_stale = 1; static struct bitmask **node_cpu_mask_v2; +static char node_cpu_mask_v2_stale = 1; WEAK void numa_error(char *where); @@ -1272,6 +1274,7 @@ numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen) int err = 0; char fn[64]; FILE *f; + char update; char *line = NULL; size_t len = 0; struct bitmask bitmask; @@ -1287,7 +1290,8 @@ numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen) } if (bufferlen > buflen_needed) memset(buffer, 0, bufferlen); - if (node_cpu_mask_v1[node]) { + update = __atomic_fetch_and(&node_cpu_mask_v1_stale, 0, __ATOMIC_RELAXED); + if (node_cpu_mask_v1[node] && !update) { memcpy(buffer, node_cpu_mask_v1[node], buflen_needed); return 0; } @@ -1328,7 +1332,15 @@ numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen) /* slightly racy, see above */ if (node_cpu_mask_v1[node]) { - if (mask != buffer) + if (update) { + /* + * There may be readers on node_cpu_mask_v1[], hence it can not + * be freed. + */ + memcpy(node_cpu_mask_v1[node], mask, buflen_needed); + free(mask); + mask = NULL; + } else if (mask != buffer) free(mask); } else { node_cpu_mask_v1[node] = mask; @@ -1352,6 +1364,7 @@ numa_node_to_cpus_v2(int node, struct bitmask *buffer) int nnodes = numa_max_node(); char fn[64], *line = NULL; FILE *f; + char update; size_t len = 0; struct bitmask *mask; @@ -1364,7 +1377,8 @@ numa_node_to_cpus_v2(int node, struct bitmask *buffer) } numa_bitmask_clearall(buffer); - if (node_cpu_mask_v2[node]) { + update = __atomic_fetch_and(&node_cpu_mask_v2_stale, 0, __ATOMIC_RELAXED); + if (node_cpu_mask_v2[node] && !update) { /* have already constructed a mask for this node */ if (buffer->size < node_cpu_mask_v2[node]->size) { errno = EINVAL; @@ -1407,8 +1421,12 @@ numa_node_to_cpus_v2(int node, struct bitmask *buffer) /* slightly racy, see above */ /* save the mask we created */ if (node_cpu_mask_v2[node]) { + if (update) { + copy_bitmask_to_bitmask(mask, node_cpu_mask_v2[node]); + numa_bitmask_free(mask); + mask = NULL; /* how could this be? */ - if (mask != buffer) + } else if (mask != buffer) numa_bitmask_free(mask); } else { /* we don't want to cache faulty result */ @@ -1424,6 +1442,12 @@ __asm__(".symver numa_node_to_cpus_v2,numa_node_to_cpus@@libnuma_1.2"); make_internal_alias(numa_node_to_cpus_v1); make_internal_alias(numa_node_to_cpus_v2); +void numa_node_to_cpu_update(void) +{ + __atomic_store_n(&node_cpu_mask_v1_stale, 1, __ATOMIC_RELAXED); + __atomic_store_n(&node_cpu_mask_v2_stale, 1, __ATOMIC_RELAXED); +} + /* report the node of the specified cpu */ int numa_node_of_cpu(int cpu) { diff --git a/numa.3 b/numa.3 index ba00572..e54cb0b 100644 --- a/numa.3 +++ b/numa.3 @@ -124,6 +124,8 @@ numa \- NUMA policy library .br .BI "int numa_node_to_cpus(int " node ", struct bitmask *" mask "); .br +.BI "void numa_node_to_cpu_update();" +.br .BI "int numa_node_of_cpu(int " cpu "); .sp .BI "struct bitmask *numa_allocate_cpumask();" @@ -232,6 +234,7 @@ Most functions in this library are only concerned about numa nodes and their memory. The exceptions to this are: .IR numa_node_to_cpus (), +.IR numa_node_to_cpu_update (), .IR numa_node_of_cpu (), .IR numa_bind (), .IR numa_run_on_node (), @@ -795,6 +798,10 @@ will be set to .I ERANGE and \-1 returned. On success 0 is returned. +.BR numa_node_to_cpu_update () +Mark the node's cpus bitmask stale, then get the latest bitmask by calling +.BR numa_node_to_cpus () + .BR numa_node_of_cpu () returns the node that a cpu belongs to. If the user supplies an invalid cpu .I errno diff --git a/numa.h b/numa.h index 3a8c543..7316d1e 100644 --- a/numa.h +++ b/numa.h @@ -282,6 +282,8 @@ static inline void numa_free_cpumask(struct bitmask *b) /* Convert node to CPU mask. -1/errno on failure, otherwise 0. */ int numa_node_to_cpus(int, struct bitmask *); +void numa_node_to_cpu_update(void); + /* report the node of the specified cpu. -1/errno on invalid cpu. */ int numa_node_of_cpu(int cpu); diff --git a/versions.ldscript b/versions.ldscript index 4b04936..23074a0 100644 --- a/versions.ldscript +++ b/versions.ldscript @@ -60,6 +60,7 @@ libnuma_1.1 { numa_tonodemask_memory; numa_warn; numa_exit_on_warn; + numa_node_to_cpu_update; local: *; }; -- 2.7.5