OVERVIEW
I have a program that needs to have shared state between several processes (probably 80, I'm working on an embarrassingly parallel problem on a server with 80 cores). Ideally, I would be able to allocate this shared memory in such a way that I could expand the amount of shared state between these processes.
My suspicion is that it's failing because the pointers don't point to actual memory, so if the return value of mmap in one process is 0xDEADBEEF, that doesn't mean that 0xDEADBEEF will point to the same section of memory in another process. However, I know next to nothing about C programming, so that suspicion could easily be wrong.
Could anyone tell me if my suspicion is correct? If so, what should I be doing for shared state instead? The server would take at least 18 days per dataset without using all the cores, and we have quite a few datasets, so giving up on parallel computing is not really an option. However, I am willing to switch from processes to threads or something similar if that would help (I don't know how one would do that in C though). Thanks in advance for your help.
Below is some working and non-working sample code, and the results from gdb.
borked.h
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/mman.h>
// Expandable array, doubles memory allocation when it runs out of space.
struct earr {
int *vals;
int capacity;
int length;
};
void *shared_calloc(size_t nmemb, size_t size) {
void *mem = mmap(NULL, nmemb * size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
return memset(mem, 0, nmemb * size);
}
void shared_free_size(void *mem, size_t nmemb, size_t size) {
if(mem) munmap(mem, nmemb * size);
}
struct earr *create_earr() {
struct earr *a = shared_calloc(1, sizeof(struct earr));
a->length = 0;
a->capacity = 16;
a->vals = shared_calloc(a->capacity, sizeof(int));
return a;
}
void earr_expand(struct earr *a) {
int *new_vals = shared_calloc(a->capacity * 2, sizeof(int));
memcpy(new_vals, a->vals, a->capacity * sizeof(int));
a->vals = new_vals;
a->capacity *= 2;
}
void earr_insert(struct earr *a, int val) {
if(a->length >= a->capacity) earr_expand(a);
a->vals[a->length] = val;
a->length++;
}
int earr_lookup(struct earr *a, int index) {
return a->vals[index];
}
working.c
#include "borked.h"
int main(void) {
struct earr *a = create_earr();
int i;
pid_t pid;
int size = 0x10000;
for(i = 0; i < size; i++) {
earr_insert(a, i);
}
for(i = 0; i < size; i++) {
earr_lookup(a, i);
}
return EXIT_SUCCESS;
}
broken.c
#include "borked.h"
int main(void) {
struct earr *a = create_earr();
int i;
pid_t pid;
int size = 0x10000;
if(0 == (pid = fork())) {
for(i = 0; i < size; i++) {
earr_insert(a, i);
}
} else {
int status;
waitpid(pid, &status, 0);
for(i = 0; i < size; i++) {
earr_lookup(a, i);
}
}
return EXIT_SUCCESS;
}
GDB debugging
$ gdb broken
...
(gdb) run
Starting program /path/to/broken
Program received signal SIGSEGV, Segmentation Fault
0x08048663 in earr_lookup (a=0xb7fda000, index=0) at /path/to/borked.h:46
46 return a->vals[index];
(gdb) x/3x 0xb7fda000
0xb7fda000: 0xb7da6000 0x00010000 0x00010000
(gdb) x/x 0xb7da6000
0xb7da6000: Cannot access memory at address 0xb7da6000