|
|
045ef6 |
# HG changeset patch
|
|
|
045ef6 |
# User gromero
|
|
|
045ef6 |
# Date 1495057954 14400
|
|
|
045ef6 |
# Wed May 17 17:52:34 2017 -0400
|
|
|
045ef6 |
# Node ID 74c81011375b5f432df155dcd7b3c9a668b45740
|
|
|
045ef6 |
# Parent 4d9931ebf8617b1b06adbc1beee6ed1b58661a8b
|
|
|
045ef6 |
8175813: PPC64: "mbind: Invalid argument" when -XX:+UseNUMA is used
|
|
|
045ef6 |
|
|
|
045ef6 |
diff --git a/src/os/linux/vm/os_linux.cpp b/src/os/linux/vm/os_linux.cpp
|
|
|
045ef6 |
--- openjdk/hotspot/src/os/linux/vm/os_linux.cpp
|
|
|
045ef6 |
+++ openjdk/hotspot/src/os/linux/vm/os_linux.cpp
|
|
|
045ef6 |
@@ -2736,8 +2736,9 @@
|
|
|
045ef6 |
bool os::numa_topology_changed() { return false; }
|
|
|
045ef6 |
|
|
|
045ef6 |
size_t os::numa_get_groups_num() {
|
|
|
045ef6 |
- int max_node = Linux::numa_max_node();
|
|
|
045ef6 |
- return max_node > 0 ? max_node + 1 : 1;
|
|
|
045ef6 |
+ // Return just the number of nodes in which it's possible to allocate memory
|
|
|
045ef6 |
+ // (in numa terminology, configured nodes).
|
|
|
045ef6 |
+ return Linux::numa_num_configured_nodes();
|
|
|
045ef6 |
}
|
|
|
045ef6 |
|
|
|
045ef6 |
int os::numa_get_group_id() {
|
|
|
045ef6 |
@@ -2751,11 +2752,33 @@
|
|
|
045ef6 |
return 0;
|
|
|
045ef6 |
}
|
|
|
045ef6 |
|
|
|
045ef6 |
+int os::Linux::get_existing_num_nodes() {
|
|
|
045ef6 |
+ size_t node;
|
|
|
045ef6 |
+ size_t highest_node_number = Linux::numa_max_node();
|
|
|
045ef6 |
+ int num_nodes = 0;
|
|
|
045ef6 |
+
|
|
|
045ef6 |
+ // Get the total number of nodes in the system including nodes without memory.
|
|
|
045ef6 |
+ for (node = 0; node <= highest_node_number; node++) {
|
|
|
045ef6 |
+ if (isnode_in_existing_nodes(node)) {
|
|
|
045ef6 |
+ num_nodes++;
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
+ return num_nodes;
|
|
|
045ef6 |
+}
|
|
|
045ef6 |
+
|
|
|
045ef6 |
size_t os::numa_get_leaf_groups(int *ids, size_t size) {
|
|
|
045ef6 |
- for (size_t i = 0; i < size; i++) {
|
|
|
045ef6 |
- ids[i] = i;
|
|
|
045ef6 |
- }
|
|
|
045ef6 |
- return size;
|
|
|
045ef6 |
+ size_t highest_node_number = Linux::numa_max_node();
|
|
|
045ef6 |
+ size_t i = 0;
|
|
|
045ef6 |
+
|
|
|
045ef6 |
+ // Map all node ids in which is possible to allocate memory. Also nodes are
|
|
|
045ef6 |
+ // not always consecutively available, i.e. available from 0 to the highest
|
|
|
045ef6 |
+ // node number.
|
|
|
045ef6 |
+ for (size_t node = 0; node <= highest_node_number; node++) {
|
|
|
045ef6 |
+ if (Linux::isnode_in_configured_nodes(node)) {
|
|
|
045ef6 |
+ ids[i++] = node;
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
+ return i;
|
|
|
045ef6 |
}
|
|
|
045ef6 |
|
|
|
045ef6 |
bool os::get_page_info(char *start, page_info* info) {
|
|
|
045ef6 |
@@ -2825,18 +2848,28 @@
|
|
|
045ef6 |
libnuma_dlsym(handle, "numa_node_to_cpus")));
|
|
|
045ef6 |
set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t,
|
|
|
045ef6 |
libnuma_dlsym(handle, "numa_max_node")));
|
|
|
045ef6 |
+ set_numa_num_configured_nodes(CAST_TO_FN_PTR(numa_num_configured_nodes_func_t,
|
|
|
045ef6 |
+ libnuma_dlsym(handle, "numa_num_configured_nodes")));
|
|
|
045ef6 |
set_numa_available(CAST_TO_FN_PTR(numa_available_func_t,
|
|
|
045ef6 |
libnuma_dlsym(handle, "numa_available")));
|
|
|
045ef6 |
set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t,
|
|
|
045ef6 |
libnuma_dlsym(handle, "numa_tonode_memory")));
|
|
|
045ef6 |
set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t,
|
|
|
045ef6 |
- libnuma_dlsym(handle, "numa_interleave_memory")));
|
|
|
045ef6 |
+ libnuma_dlsym(handle, "numa_interleave_memory")));
|
|
|
045ef6 |
set_numa_set_bind_policy(CAST_TO_FN_PTR(numa_set_bind_policy_func_t,
|
|
|
045ef6 |
- libnuma_dlsym(handle, "numa_set_bind_policy")));
|
|
|
045ef6 |
-
|
|
|
045ef6 |
+ libnuma_dlsym(handle, "numa_set_bind_policy")));
|
|
|
045ef6 |
+ set_numa_bitmask_isbitset(CAST_TO_FN_PTR(numa_bitmask_isbitset_func_t,
|
|
|
045ef6 |
+ libnuma_dlsym(handle, "numa_bitmask_isbitset")));
|
|
|
045ef6 |
+ set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t,
|
|
|
045ef6 |
+ libnuma_dlsym(handle, "numa_distance")));
|
|
|
045ef6 |
|
|
|
045ef6 |
if (numa_available() != -1) {
|
|
|
045ef6 |
set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
|
|
|
045ef6 |
+ set_numa_all_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_all_nodes_ptr"));
|
|
|
045ef6 |
+ set_numa_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_nodes_ptr"));
|
|
|
045ef6 |
+ // Create an index -> node mapping, since nodes are not always consecutive
|
|
|
045ef6 |
+ _nindex_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true);
|
|
|
045ef6 |
+ rebuild_nindex_to_node_map();
|
|
|
045ef6 |
// Create a cpu -> node mapping
|
|
|
045ef6 |
_cpu_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true);
|
|
|
045ef6 |
rebuild_cpu_to_node_map();
|
|
|
045ef6 |
@@ -2847,6 +2880,17 @@
|
|
|
045ef6 |
return false;
|
|
|
045ef6 |
}
|
|
|
045ef6 |
|
|
|
045ef6 |
+void os::Linux::rebuild_nindex_to_node_map() {
|
|
|
045ef6 |
+ int highest_node_number = Linux::numa_max_node();
|
|
|
045ef6 |
+
|
|
|
045ef6 |
+ nindex_to_node()->clear();
|
|
|
045ef6 |
+ for (int node = 0; node <= highest_node_number; node++) {
|
|
|
045ef6 |
+ if (Linux::isnode_in_existing_nodes(node)) {
|
|
|
045ef6 |
+ nindex_to_node()->append(node);
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
+}
|
|
|
045ef6 |
+
|
|
|
045ef6 |
// rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id.
|
|
|
045ef6 |
// The table is later used in get_node_by_cpu().
|
|
|
045ef6 |
void os::Linux::rebuild_cpu_to_node_map() {
|
|
|
045ef6 |
@@ -2866,16 +2910,46 @@
|
|
|
045ef6 |
|
|
|
045ef6 |
cpu_to_node()->clear();
|
|
|
045ef6 |
cpu_to_node()->at_grow(cpu_num - 1);
|
|
|
045ef6 |
- size_t node_num = numa_get_groups_num();
|
|
|
045ef6 |
-
|
|
|
045ef6 |
+
|
|
|
045ef6 |
+ size_t node_num = get_existing_num_nodes();
|
|
|
045ef6 |
+
|
|
|
045ef6 |
+ int distance = 0;
|
|
|
045ef6 |
+ int closest_distance = INT_MAX;
|
|
|
045ef6 |
+ int closest_node = 0;
|
|
|
045ef6 |
unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size, mtInternal);
|
|
|
045ef6 |
for (size_t i = 0; i < node_num; i++) {
|
|
|
045ef6 |
- if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) {
|
|
|
045ef6 |
+ // Check if node is configured (not a memory-less node). If it is not, find
|
|
|
045ef6 |
+ // the closest configured node.
|
|
|
045ef6 |
+ if (!isnode_in_configured_nodes(nindex_to_node()->at(i))) {
|
|
|
045ef6 |
+ closest_distance = INT_MAX;
|
|
|
045ef6 |
+ // Check distance from all remaining nodes in the system. Ignore distance
|
|
|
045ef6 |
+ // from itself and from another non-configured node.
|
|
|
045ef6 |
+ for (size_t m = 0; m < node_num; m++) {
|
|
|
045ef6 |
+ if (m != i && isnode_in_configured_nodes(nindex_to_node()->at(m))) {
|
|
|
045ef6 |
+ distance = numa_distance(nindex_to_node()->at(i), nindex_to_node()->at(m));
|
|
|
045ef6 |
+ // If a closest node is found, update. There is always at least one
|
|
|
045ef6 |
+ // configured node in the system so there is always at least one node
|
|
|
045ef6 |
+ // close.
|
|
|
045ef6 |
+ if (distance != 0 && distance < closest_distance) {
|
|
|
045ef6 |
+ closest_distance = distance;
|
|
|
045ef6 |
+ closest_node = nindex_to_node()->at(m);
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
+ } else {
|
|
|
045ef6 |
+ // Current node is already a configured node.
|
|
|
045ef6 |
+ closest_node = nindex_to_node()->at(i);
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
+
|
|
|
045ef6 |
+ // Get cpus from the original node and map them to the closest node. If node
|
|
|
045ef6 |
+ // is a configured node (not a memory-less node), then original node and
|
|
|
045ef6 |
+ // closest node are the same.
|
|
|
045ef6 |
+ if (numa_node_to_cpus(nindex_to_node()->at(i), cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) {
|
|
|
045ef6 |
for (size_t j = 0; j < cpu_map_valid_size; j++) {
|
|
|
045ef6 |
if (cpu_map[j] != 0) {
|
|
|
045ef6 |
for (size_t k = 0; k < BitsPerCLong; k++) {
|
|
|
045ef6 |
if (cpu_map[j] & (1UL << k)) {
|
|
|
045ef6 |
- cpu_to_node()->at_put(j * BitsPerCLong + k, i);
|
|
|
045ef6 |
+ cpu_to_node()->at_put(j * BitsPerCLong + k, closest_node);
|
|
|
045ef6 |
}
|
|
|
045ef6 |
}
|
|
|
045ef6 |
}
|
|
|
045ef6 |
@@ -2893,14 +2967,20 @@
|
|
|
045ef6 |
}
|
|
|
045ef6 |
|
|
|
045ef6 |
GrowableArray<int>* os::Linux::_cpu_to_node;
|
|
|
045ef6 |
+GrowableArray<int>* os::Linux::_nindex_to_node;
|
|
|
045ef6 |
os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
|
|
|
045ef6 |
os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
|
|
|
045ef6 |
os::Linux::numa_max_node_func_t os::Linux::_numa_max_node;
|
|
|
045ef6 |
+os::Linux::numa_num_configured_nodes_func_t os::Linux::_numa_num_configured_nodes;
|
|
|
045ef6 |
os::Linux::numa_available_func_t os::Linux::_numa_available;
|
|
|
045ef6 |
os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory;
|
|
|
045ef6 |
os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory;
|
|
|
045ef6 |
os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
|
|
|
045ef6 |
+os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset;
|
|
|
045ef6 |
+os::Linux::numa_distance_func_t os::Linux::_numa_distance;
|
|
|
045ef6 |
unsigned long* os::Linux::_numa_all_nodes;
|
|
|
045ef6 |
+struct bitmask* os::Linux::_numa_all_nodes_ptr;
|
|
|
045ef6 |
+struct bitmask* os::Linux::_numa_nodes_ptr;
|
|
|
045ef6 |
|
|
|
045ef6 |
bool os::pd_uncommit_memory(char* addr, size_t size) {
|
|
|
045ef6 |
uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE,
|
|
|
045ef6 |
diff --git a/src/os/linux/vm/os_linux.hpp b/src/os/linux/vm/os_linux.hpp
|
|
|
045ef6 |
--- openjdk/hotspot/src/os/linux/vm/os_linux.hpp
|
|
|
045ef6 |
+++ openjdk/hotspot/src/os/linux/vm/os_linux.hpp
|
|
|
045ef6 |
@@ -67,6 +67,7 @@
|
|
|
045ef6 |
static bool _supports_fast_thread_cpu_time;
|
|
|
045ef6 |
|
|
|
045ef6 |
static GrowableArray<int>* _cpu_to_node;
|
|
|
045ef6 |
+ static GrowableArray<int>* _nindex_to_node;
|
|
|
045ef6 |
|
|
|
045ef6 |
protected:
|
|
|
045ef6 |
|
|
|
045ef6 |
@@ -94,7 +95,9 @@
|
|
|
045ef6 |
static void set_is_floating_stack() { _is_floating_stack = true; }
|
|
|
045ef6 |
|
|
|
045ef6 |
static void rebuild_cpu_to_node_map();
|
|
|
045ef6 |
+ static void rebuild_nindex_to_node_map();
|
|
|
045ef6 |
static GrowableArray<int>* cpu_to_node() { return _cpu_to_node; }
|
|
|
045ef6 |
+ static GrowableArray<int>* nindex_to_node() { return _nindex_to_node; }
|
|
|
045ef6 |
|
|
|
045ef6 |
static size_t find_large_page_size();
|
|
|
045ef6 |
static size_t setup_large_page_size();
|
|
|
045ef6 |
@@ -243,28 +246,41 @@
|
|
|
045ef6 |
typedef int (*sched_getcpu_func_t)(void);
|
|
|
045ef6 |
typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
|
|
|
045ef6 |
typedef int (*numa_max_node_func_t)(void);
|
|
|
045ef6 |
+ typedef int (*numa_num_configured_nodes_func_t)(void);
|
|
|
045ef6 |
typedef int (*numa_available_func_t)(void);
|
|
|
045ef6 |
typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
|
|
|
045ef6 |
typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask);
|
|
|
045ef6 |
typedef void (*numa_set_bind_policy_func_t)(int policy);
|
|
|
045ef6 |
+ typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n);
|
|
|
045ef6 |
+ typedef int (*numa_distance_func_t)(int node1, int node2);
|
|
|
045ef6 |
|
|
|
045ef6 |
static sched_getcpu_func_t _sched_getcpu;
|
|
|
045ef6 |
static numa_node_to_cpus_func_t _numa_node_to_cpus;
|
|
|
045ef6 |
static numa_max_node_func_t _numa_max_node;
|
|
|
045ef6 |
+ static numa_num_configured_nodes_func_t _numa_num_configured_nodes;
|
|
|
045ef6 |
static numa_available_func_t _numa_available;
|
|
|
045ef6 |
static numa_tonode_memory_func_t _numa_tonode_memory;
|
|
|
045ef6 |
static numa_interleave_memory_func_t _numa_interleave_memory;
|
|
|
045ef6 |
static numa_set_bind_policy_func_t _numa_set_bind_policy;
|
|
|
045ef6 |
+ static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
|
|
|
045ef6 |
+ static numa_distance_func_t _numa_distance;
|
|
|
045ef6 |
static unsigned long* _numa_all_nodes;
|
|
|
045ef6 |
+ static struct bitmask* _numa_all_nodes_ptr;
|
|
|
045ef6 |
+ static struct bitmask* _numa_nodes_ptr;
|
|
|
045ef6 |
|
|
|
045ef6 |
static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
|
|
|
045ef6 |
static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
|
|
|
045ef6 |
static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; }
|
|
|
045ef6 |
+ static void set_numa_num_configured_nodes(numa_num_configured_nodes_func_t func) { _numa_num_configured_nodes = func; }
|
|
|
045ef6 |
static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
|
|
|
045ef6 |
static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
|
|
|
045ef6 |
static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; }
|
|
|
045ef6 |
static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; }
|
|
|
045ef6 |
+ static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; }
|
|
|
045ef6 |
+ static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; }
|
|
|
045ef6 |
static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
|
|
|
045ef6 |
+ static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = *ptr; }
|
|
|
045ef6 |
+ static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = *ptr; }
|
|
|
045ef6 |
static int sched_getcpu_syscall(void);
|
|
|
045ef6 |
public:
|
|
|
045ef6 |
static int sched_getcpu() { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
|
|
|
045ef6 |
@@ -272,6 +288,9 @@
|
|
|
045ef6 |
return _numa_node_to_cpus != NULL ? _numa_node_to_cpus(node, buffer, bufferlen) : -1;
|
|
|
045ef6 |
}
|
|
|
045ef6 |
static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; }
|
|
|
045ef6 |
+ static int numa_num_configured_nodes() {
|
|
|
045ef6 |
+ return _numa_num_configured_nodes != NULL ? _numa_num_configured_nodes() : -1;
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
static int numa_available() { return _numa_available != NULL ? _numa_available() : -1; }
|
|
|
045ef6 |
static int numa_tonode_memory(void *start, size_t size, int node) {
|
|
|
045ef6 |
return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1;
|
|
|
045ef6 |
@@ -286,7 +305,25 @@
|
|
|
045ef6 |
_numa_set_bind_policy(policy);
|
|
|
045ef6 |
}
|
|
|
045ef6 |
}
|
|
|
045ef6 |
+ static int numa_distance(int node1, int node2) {
|
|
|
045ef6 |
+ return _numa_distance != NULL ? _numa_distance(node1, node2) : -1;
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
static int get_node_by_cpu(int cpu_id);
|
|
|
045ef6 |
+ static int get_existing_num_nodes();
|
|
|
045ef6 |
+ // Check if numa node is configured (non-zero memory node).
|
|
|
045ef6 |
+ static bool isnode_in_configured_nodes(unsigned int n) {
|
|
|
045ef6 |
+ if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) {
|
|
|
045ef6 |
+ return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n);
|
|
|
045ef6 |
+ } else
|
|
|
045ef6 |
+ return 0;
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
+ // Check if numa node exists in the system (including zero memory nodes).
|
|
|
045ef6 |
+ static bool isnode_in_existing_nodes(unsigned int n) {
|
|
|
045ef6 |
+ if (_numa_bitmask_isbitset != NULL && _numa_nodes_ptr != NULL) {
|
|
|
045ef6 |
+ return _numa_bitmask_isbitset(_numa_nodes_ptr, n);
|
|
|
045ef6 |
+ } else
|
|
|
045ef6 |
+ return 0;
|
|
|
045ef6 |
+ }
|
|
|
045ef6 |
};
|
|
|
045ef6 |
|
|
|
045ef6 |
|