To: ak@suse.de
Subject: [PATCH][REVISED] numactl: variable_programs
From: Cliff Wickman <cpw@sgi.com>

This version has a small correction: the --interleave option had called
   numa_set_interleave_mask() twice (redundant).


This patch modifies numactl, migspeed, memhog, migratepages, numademo and
stream_main to use variable-length bit masks (the bitmask structure).

Diffed against numactl-1.0.2

Signed-off-by: Cliff Wickman <cpw@sgi.com>
---
 memhog.c       |   15 ++++++-----
 migratepages.c |    8 +++---
 migspeed.c     |   19 ++++++--------
 numactl.c      |   73 ++++++++++++++++++++++++++++++---------------------------
 numademo.c     |   39 ++++++++++++++++--------------
 stream_main.c  |    9 ++++---
 6 files changed, 86 insertions(+), 77 deletions(-)

Index: numactl-1.0.2/memhog.c
===================================================================
--- numactl-1.0.2.orig/memhog.c
+++ numactl-1.0.2/memhog.c
@@ -60,14 +60,15 @@ void hog(void *map) 
 int main(int ac, char **av) 
 { 
 	char *map; 
-	nodemask_t nodes, gnodes;
+	struct bitmask *nodes, *gnodes;
 	int policy, gpolicy;
 	int ret = 0;
 	int loose = 0; 
 	int i;
 	int fd = -1; 
 
-	nodemask_zero(&nodes); 
+	nodes = allocate_nodemask();
+	gnodes = allocate_nodemask();
 
 	while (av[1] && av[1][0] == '-') { 
 		switch (av[1][1]) { 
@@ -106,19 +107,19 @@ int main(int ac, char **av) 
 	if (map == (char*)-1) 
 		err("mmap");
 	
-	if (mbind(map, length, policy, nodes.n, NUMA_NUM_NODES + 1, 0) < 0) 
+	if (mbind(map, length, policy, nodes->maskp, nodes->size, 0) < 0)
 		terr("mbind");
 	
 	gpolicy = -1; 
-	if (get_mempolicy(&gpolicy, gnodes.n, NUMA_NUM_NODES + 1, map,
-			  MPOL_F_ADDR) < 0)
+	if (get_mempolicy(&gpolicy, gnodes->maskp, gnodes->size, map, MPOL_F_ADDR) < 0)
 		terr("get_mempolicy");
 	if (!loose && policy != gpolicy) {
 		ret = 1;
 		printf("policy %d gpolicy %d\n", policy, gpolicy); 
 	}
-	if (!loose && !nodemask_equal(&gnodes, &nodes)) { 
-		printf("nodes differ %lx, %lx!\n", gnodes.n[0], nodes.n[0]); 
+	if (!loose && !bitmask_equal(gnodes, nodes)) {
+		printf("nodes differ %lx, %lx!\n",
+			gnodes->maskp[0], nodes->maskp[0]);
 		ret = 1;
 	}
 
Index: numactl-1.0.2/migratepages.c
===================================================================
--- numactl-1.0.2.orig/migratepages.c
+++ numactl-1.0.2/migratepages.c
@@ -26,8 +26,8 @@
 #include <string.h>
 #include <unistd.h>
 #include <stdarg.h>
-#include "numaif.h"
 #include "numa.h"
+#include "numaif.h"
 #include "numaint.h"
 #include "util.h"
 
@@ -62,8 +62,8 @@ int main(int argc, char *argv[])
 	char *end;
 	int rc;
 	int pid;
-	nodemask_t fromnodes;
-	nodemask_t tonodes;
+	struct bitmask *fromnodes;
+	struct bitmask *tonodes;
 
 	while ((c = getopt_long(argc,argv,"h", opts, NULL)) != -1) {
 		switch (c) {
@@ -87,7 +87,7 @@ int main(int argc, char *argv[])
 	fromnodes = nodemask(argv[1]);
 	tonodes = nodemask(argv[2]);
 
-	rc = numa_migrate_pages(pid, &fromnodes, &tonodes);
+	rc = numa_migrate_pages(pid, fromnodes, tonodes);
 
 	if (rc < 0) {
 		perror("migrate_pages");
Index: numactl-1.0.2/migspeed.c
===================================================================
--- numactl-1.0.2.orig/migspeed.c
+++ numactl-1.0.2/migspeed.c
@@ -7,7 +7,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include "numa.h"
-#include <numaif.h>
+#include "numaif.h"
 #include <time.h>
 #include <errno.h>
 #include <malloc.h>
@@ -71,8 +71,8 @@ int main(int argc, char *argv[])
 	struct timespec result;
 	unsigned long bytes;
 	double duration, mbytes;
-	nodemask_t from;
-	nodemask_t to;
+	struct bitmask *from;
+	struct bitmask *to;
 
 	pagesize = getpagesize();
 
@@ -100,8 +100,7 @@ int main(int argc, char *argv[])
 		usage();
 
 	if (verbose > 1)
-		printf("numa_max_node = %d NUMA_NUM_NODES=%d\n",
-			numa_max_node(), NUMA_NUM_NODES);
+		printf("numa_max_node = %d\n", numa_max_node());
 
 	from = nodemask(argv[optind]);
 	if (errno) {
@@ -110,7 +109,7 @@ int main(int argc, char *argv[])
 	}
 
 	if (verbose)
-		printmask("From", &from);
+		printmask("From", from);
 
 	if (!argv[optind+1])
 		usage();
@@ -122,7 +121,7 @@ int main(int argc, char *argv[])
 	}
 
 	if (verbose)
-		printmask("To", &to);
+		printmask("To", to);
 
 	bytes = pages * pagesize;
 
@@ -137,8 +136,7 @@ int main(int argc, char *argv[])
 		exit(2);
 	}
 
-	if (mbind(memory, bytes, MPOL_BIND, (unsigned long *)&from,
-				NUMA_NUM_NODES+1, 0) < 0)
+	if (mbind(memory, bytes, MPOL_BIND, from->maskp, from->size, 0) < 0)
 		numa_error("mbind");
 
 	if (verbose)
@@ -153,8 +151,7 @@ int main(int argc, char *argv[])
 	displaymap();
 	clock_gettime(CLOCK_REALTIME, &start);
 
-	if (mbind(memory, bytes, MPOL_BIND, (unsigned long *)&to,
-			NUMA_NUM_NODES+1, MPOL_MF_MOVE)<0)
+	if (mbind(memory, bytes, MPOL_BIND, to->maskp, to->size, MPOL_MF_MOVE) <0)
 		numa_error("memory move");
 
 	clock_gettime(CLOCK_REALTIME, &end);
Index: numactl-1.0.2/numactl.c
===================================================================
--- numactl-1.0.2.orig/numactl.c
+++ numactl-1.0.2/numactl.c
@@ -23,8 +23,8 @@
 #include <unistd.h>
 #include <stdarg.h>
 #include <ctype.h>
-#include "numaif.h"
 #include "numa.h"
+#include "numaif.h"
 #include "numaint.h"
 #include "util.h"
 #include "shm.h"
@@ -93,21 +93,21 @@ void usage_msg(char *msg, ...) 
 
 void show_physcpubind(void)
 {
-	int ncpus = 8192;
+	int ncpus = number_of_configured_cpus();
 	
 	for (;;) { 
-		int cpubufsize = round_up(ncpus, BITS_PER_LONG) / BYTES_PER_LONG;
-		unsigned  long cpubuf[cpubufsize / sizeof(long)];
-		
-		memset(cpubuf,0,cpubufsize);
-		if (numa_sched_getaffinity(0, cpubufsize, cpubuf) < 0) { 
+		struct bitmask *cpubuf;
+
+		cpubuf = bitmask_alloc(ncpus);
+
+		if (numa_sched_getaffinity(0, cpubuf) < 0) {
 			if (errno == EINVAL && ncpus < 1024*1024) {
 				ncpus *= 2; 
 				continue;
 			}
 			err("sched_get_affinity");
 		}
-		printcpumask("physcpubind", cpubuf, cpubufsize);
+		printcpumask("physcpubind", cpubuf);
 		break;
 	}
 }
@@ -115,9 +115,10 @@ void show_physcpubind(void)
 void show(void)
 {
 	unsigned long prefnode;
-	nodemask_t membind, interleave, cpubind;
+	struct bitmask *membind, *interleave, *cpubind;
 	unsigned long cur;
 	int policy;
+	int numa_num_nodes = number_of_possible_nodes();
 	
 	if (numa_available() < 0) { 
 		show_physcpubind();
@@ -153,17 +154,17 @@ void show(void)
 		printf("%ld (interleave next)\n",cur); 
 		break; 
 	case MPOL_BIND:
-		printf("%d\n", find_first_bit(&membind, NUMA_NUM_NODES)); 
+		printf("%d\n", find_first_bit(&membind, numa_num_nodes));
 		break;
 	} 
 	if (policy == MPOL_INTERLEAVE) {
-		printmask("interleavemask", &interleave);
+		printmask("interleavemask", interleave);
 		printf("interleavenode: %ld\n", cur); 
 	}
 	show_physcpubind();
-	printmask("cpubind", &cpubind);  // for compatibility
-	printmask("nodebind", &cpubind);
-	printmask("membind", &membind);
+	printmask("cpubind", cpubind);  // for compatibility
+	printmask("nodebind", cpubind);
+	printmask("membind", membind);
 }
 
 char *fmt_mem(unsigned long long mem, char *buf) 
@@ -199,11 +200,15 @@ static void print_distances(int maxnode)
 void print_node_cpus(int node)
 {
 	int len = 1;
+	int conf_cpus = number_of_configured_cpus();
+
 	for (;;) { 
-		int i;
-		unsigned long cpus[len];
+		int i, err;
+		struct bitmask *cpus;
+
+		cpus = bitmask_alloc(conf_cpus);
 		errno = 0;
-		int err = numa_node_to_cpus(node, cpus, len * sizeof(long));
+		err = numa_node_to_cpus(node, cpus);
 		if (err < 0) {
 			if (errno == ERANGE) {
 				len *= 2; 
@@ -212,7 +217,7 @@ void print_node_cpus(int node)
 			break; 
 		}
 		for (i = 0; i < len*BITS_PER_LONG; i++) 
-			if (test_bit(i, cpus))
+			if (bitmask_isbitset(cpus, i))
 				printf(" %d", i);
 		break;
 	}
@@ -222,7 +227,7 @@ void print_node_cpus(int node)
 void hardware(void)
 { 
 	int i;
-	int maxnode = numa_max_node(); 
+	int maxnode = number_of_configured_nodes()-1;
 	printf("available: %d nodes (0-%d)\n", 1+maxnode, maxnode); 	
 	for (i = 0; i <= maxnode; i++) { 
 		char buf[64];
@@ -317,14 +322,15 @@ void get_short_opts(struct option *o, ch
 
 int main(int ac, char **av)
 {
-	int c, i, nnodes=0;
+	int c, ncpus, i, nnodes=0;
 	long node=-1;
 	char *end;
 	char shortopts[array_len(opts)*2 + 1];
+	struct bitmask *mask;
+
 	get_short_opts(opts,shortopts);
-	while ((c = getopt_long(ac, av, shortopts, opts, NULL)) != -1) { 
-		nodemask_t mask;
-		switch (c) { 
+	while ((c = getopt_long(ac, av, shortopts, opts, NULL)) != -1) {
+		switch (c) {
 		case 's': /* --show */
 			show();
 			exit(0);  
@@ -338,9 +344,9 @@ int main(int ac, char **av)
 			errno = 0;
 			setpolicy(MPOL_INTERLEAVE);
 			if (shmfd >= 0)
-				numa_interleave_memory(shmptr, shmlen, &mask);
+				numa_interleave_memory(shmptr, shmlen, mask);
 			else
-				numa_set_interleave_mask(&mask);
+				numa_set_interleave_mask(mask);
 			checkerror("setting interleave mask");
 			break;
 		case 'N': /* --cpunodebind */
@@ -351,20 +357,18 @@ int main(int ac, char **av)
 			errno = 0;
 			check_cpubind(do_shm);
 			did_cpubind = 1;
-			numa_run_on_node_mask(&mask);
+			numa_run_on_node_mask(mask);
 			checkerror("sched_setaffinity");
 			break;
 		case 'C': /* --physcpubind */
 		{
-			int ncpus;
-			unsigned long *cpubuf;
-			numa_max_node();
+			struct bitmask *cpubuf;
 			dontshm("-C/--physcpubind");
 			cpubuf = cpumask(optarg, &ncpus);
 			errno = 0;
 			check_cpubind(do_shm);
 			did_cpubind = 1;
-			numa_sched_setaffinity(0, CPU_BYTES(ncpus), cpubuf);
+			numa_sched_setaffinity(0, cpubuf);
 			checkerror("sched_setaffinity");
 			free(cpubuf);
 			break;
@@ -376,9 +380,9 @@ int main(int ac, char **av)
 			errno = 0;
 			numa_set_bind_policy(1);
 			if (shmfd >= 0) { 
-				numa_tonodemask_memory(shmptr, shmlen, &mask);
+				numa_tonodemask_memory(shmptr, shmlen, mask);
 			} else {
-				numa_set_membind(&mask);
+				numa_set_membind(mask);
 			}
 			numa_set_bind_policy(0);
 			checkerror("setting membind");
@@ -387,14 +391,15 @@ int main(int ac, char **av)
 			checknuma();
 			setpolicy(MPOL_PREFERRED);
 			mask = nodemask(optarg);
-			for (i=0; i<sizeof(mask); i++) {
-				if (nodemask_isset(&mask, i)) {
+			for (i=0; i<mask->size; i++) {
+				if (bitmask_isbitset(mask, i)) {
 					node = i;
 					nnodes++;
 				}
 			}
 			if (nnodes != 1)
 				usage();
+			bitmask_free(mask);
 			errno = 0;
 			numa_set_bind_policy(0);
 			if (shmfd >= 0) 
Index: numactl-1.0.2/numademo.c
===================================================================
--- numactl-1.0.2.orig/numademo.c
+++ numactl-1.0.2/numademo.c
@@ -210,8 +210,9 @@ void test(enum test type)
 	unsigned long mask;
 	int i, k;
 	char buf[512];
-	nodemask_t nodes;
+	struct bitmask *nodes;
 
+	nodes = numa_allocate_nodemask();
 	thistest = type; 
 
 	memtest("memory with no policy", numa_alloc(msize));
@@ -228,10 +229,10 @@ void test(enum test type)
 		char buf2[10];
 		if (popcnt(mask) == 1) 
 			continue;
-		nodemask_zero(&nodes); 
+		numa_bitmask_clearall(nodes);
 		for (w = 0; mask >> w; w++) { 
 			if ((mask >> w) & 1)
-				nodemask_set(&nodes, w); 
+				numa_bitmask_setbit(nodes, w);
 		} 
 
 		sprintf(buf, "memory interleaved on"); 
@@ -240,7 +241,7 @@ void test(enum test type)
 				sprintf(buf2, " %d", k);
 				strcat(buf, buf2);
 			}
-		memtest(buf, numa_alloc_interleaved_subset(msize, &nodes)); 
+		memtest(buf, numa_alloc_interleaved_subset(msize, nodes));
 	}
 
 	for (i = 0; i <= max_node; i++) { 
@@ -249,19 +250,21 @@ void test(enum test type)
 		memtest("memory without policy", numa_alloc(msize)); 
 	} 
 
-	numa_set_interleave_mask(&numa_all_nodes); 
+	numa_set_interleave_mask(numa_all_nodes);
 	memtest("manual interleaving to all nodes", numa_alloc(msize)); 
 
 	if (max_node > 0) { 
-		nodemask_zero(&nodes); 
-		nodemask_set(&nodes, 0);
-		nodemask_set(&nodes, 1);
-		numa_set_interleave_mask(&nodes); 
+		numa_bitmask_clearall(nodes);
+		numa_bitmask_setbit(nodes, 0);
+		numa_bitmask_setbit(nodes, 1);
+		numa_set_interleave_mask(nodes);
 		memtest("manual interleaving on node 0/1", numa_alloc(msize)); 
 		printf("current interleave node %d\n", numa_get_interleave_node()); 
 	} 
 
-	numa_set_interleave_mask(&numa_no_nodes); 
+	numa_set_interleave_mask(numa_no_nodes);
+
+	nodes = numa_allocate_nodemask();
 
 	for (i = 0; i <= max_node; i++) { 
 		int oldhn = numa_preferred();
@@ -275,22 +278,22 @@ void test(enum test type)
 			numa_alloc_interleaved(msize)); 
 
 		if (max_node >= 1) { 
-			nodemask_zero(&nodes);
-			nodemask_set(&nodes, 0);
-			nodemask_set(&nodes, 1);
+			numa_bitmask_clearall(nodes);
+			numa_bitmask_setbit(nodes, 0);
+			numa_bitmask_setbit(nodes, 1);
 			memtest("memory interleaved on node 0/1", 
-				numa_alloc_interleaved_subset(msize, &nodes)); 
+				numa_alloc_interleaved_subset(msize, nodes));
 		} 
 
 		for (k = 0; k <= max_node; k++) { 
 			if (k == i) 
 				continue;
 			sprintf(buf, "alloc on node %d", k);
-			nodemask_zero(&nodes);
-			nodemask_set(&nodes, k); 
-			numa_set_membind(&nodes); 
+			numa_bitmask_clearall(nodes);
+			numa_bitmask_setbit(nodes, k);
+			numa_set_membind(nodes);
 			memtest(buf, numa_alloc(msize)); 			
-			numa_set_membind(&numa_all_nodes);
+			numa_set_membind(numa_all_nodes);
 		}
 		
 		numa_set_localalloc(); 
Index: numactl-1.0.2/stream_main.c
===================================================================
--- numactl-1.0.2.orig/stream_main.c
+++ numactl-1.0.2/stream_main.c
@@ -16,19 +16,22 @@ char *policy = "default";
 /* Run STREAM with a numa policy */
 int main(int ac, char **av)
 {
-	nodemask_t nodes; 
+	struct bitmask *nodes;
 	char *map;
 	long size;
 	int policy;
+
 	policy = parse_policy(av[1], av[2]); 
-	nodemask_zero(&nodes);
+
+        nodes = allocate_nodemask();
+
 	if (av[1] && av[2])
 		nodes = nodemask(av[2]);
 	size = stream_memsize();  
 	map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
 		   0, 0); 
 	if (map == (char*)-1) exit(1); 
-	if (mbind(map, size, policy, &nodes.n[0], NUMA_NUM_NODES, 0) < 0) 
+	if (mbind(map, size, policy, nodes->maskp, nodes->size, 0) < 0)
 		perror("mbind"), exit(1);
 	stream_init(map); 
 	stream_test(NULL);
