Correct way to get data structure with MPI_Gather
I have the following data structure that I am trying to send using MPI_Gather:
struct set {
int nbits;
char bits[];
};
The problem is that I cannot collect all the elements of the above structure, only the first element. The rest of the elements just don't make sense.
Here's a test file:
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
#define SIZE 10
struct set {
int nbits;
char bits[];
};
int main(int argc, char *argv[]) {
int np, rank, i;
struct set *subsets, *single;
void *buf;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &np);
single = malloc(sizeof(struct set) + SIZE);
if(rank == 0) {
subsets = malloc((sizeof(struct set) + SIZE) * np);
}
buf = &subsets[0];
MPI_Datatype set_type, oldtypes[2];
int blockcounts[2];
MPI_Aint offsets[2];
MPI_Aint addr[3];
MPI_Get_address(single, &addr[0]);
MPI_Get_address(&single->nbits, &addr[1]);
MPI_Get_address(&single->bits, &addr[2]);
offsets[0] = addr[1] - addr[0];
oldtypes[0] = MPI_INT;
blockcounts[0] = 1;
offsets[1] = addr[2] - addr[0];
oldtypes[1] = MPI_CHAR;
blockcounts[1] = SIZE;
MPI_Type_create_struct(2, blockcounts, offsets, oldtypes, &set_type);
MPI_Type_commit(&set_type);
single->nbits = 2;
for(i=0; i<single->nbits; i++)
single->bits[i] = 'A' + rank;
MPI_Gather(single, 1, set_type, buf, 1, set_type, 0, MPI_COMM_WORLD);
if(rank == 0) {
void *ptr;
struct set *fs;
int size;
MPI_Type_size(set_type, &size);
ptr = buf;
for(i=0; i<np; i++) {
size_t j;
fs = ptr;
printf("from rank %d: bits => %p nbits => %d\n", i, fs->bits, fs->nbits);
for(j=0; j<2; j++)
printf("from rank %d: buf[%d] = %#x\n",
i, j, fs->bits[j]);
ptr += size;
}
}
MPI_Type_free(&set_type);
MPI_Finalize();
}
Any help would be appreciated.
source to share
The problem lies not so MPI
much in pointer arithmetic with MPI structures and types.
You have
void *ptr;
struct set *fs;
int size;
MPI_Type_size(set_type, &size);
ptr = buf;
for(i=0; i<np; i++) {
size_t j;
fs = ptr;
printf("from rank %d: bits => %p nbits => %d\n", i, fs->bits, fs->nbits);
for(j=0; j<2; j++)
printf("from rank %d: buf[%d] = %#x\n",
i, j, fs->bits[j]);
ptr += size;
}
}
But it MPI_Type_size
actually gives the amount of data in the type; if there is padding (which will probably be here to get an array of characters on a word boundary) it is not the same as sizeof
. If you want to use MPI functions here, if you switch that function call to MPI_Type_extent
, which will actually tell you the entire degree covered by the type, your code works for me ... but still a problem.
If you look at the difference between sizeof(struct set)+SIZE
and MPI_Type_extent()
, you will see that they are not the same; this is:
#define SIZE 10
struct set {
int nbits
char nbits[]
}
...
malloc(sizeof(struct set)+SIZE);
does not match
struct set {
int nbits
char nbits[SIZE]
}
malloc(sizeof(struct set));
due to filling, etc. This means that the size is subsets
incorrect and MPI_Gather
a memory error is thrown on the call .
You can get around this in a number of ways, but the simplest (and shortest in terms of the number of lines) is to define a struct with an array already set, and then use array indexing instead of pointer arithmetic:
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
#define SIZE 10
struct set {
int nbits;
char bits[SIZE];
};
int main(int argc, char *argv[]) {
int np, rank, i;
struct set *subsets, *single;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &np);
single = malloc(sizeof(struct set));
if(rank == 0) {
subsets = malloc(sizeof(struct set) * np);
}
MPI_Datatype set_type, oldtypes[2];
int blockcounts[2];
MPI_Aint offsets[2];
MPI_Aint addr[3];
MPI_Get_address(single, &addr[0]);
MPI_Get_address(&single->nbits, &addr[1]);
MPI_Get_address(&single->bits, &addr[2]);
offsets[0] = addr[1] - addr[0];
oldtypes[0] = MPI_INT;
blockcounts[0] = 1;
offsets[1] = addr[2] - addr[0];
oldtypes[1] = MPI_CHAR;
blockcounts[1] = SIZE;
MPI_Type_create_struct(2, blockcounts, offsets, oldtypes, &set_type);
MPI_Type_commit(&set_type);
single->nbits = 2;
for(i=0; i<single->nbits; i++)
single->bits[i] = 'A' + rank;
MPI_Gather(single, 1, set_type, &(subsets[0]), 1, set_type, 0, MPI_COMM_WORLD);
if(rank == 0) {
for(i=0; i<np; i++) {
struct set *fs = &(subsets[i]);
printf("from rank %d: bits => %p nbits => %d\n", i, fs->bits, fs->nbits);
for(int j=0; j<2; j++)
printf("from rank %d: buf[%d] = %#x\n",
i, j, fs->bits[j]);
}
}
MPI_Type_free(&set_type);
MPI_Finalize();
}
Updated to add And if you can't do that, just resize your buffer to collect data:
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
#define SIZE 10
struct set {
int nbits;
char bits[];
};
int main(int argc, char *argv[]) {
int np, rank, i;
struct set *single;
void *buf;
ptrdiff_t extent;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &np);
single = malloc(sizeof(struct set) + SIZE);
MPI_Datatype set_type, oldtypes[2];
int blockcounts[2];
MPI_Aint offsets[2];
MPI_Aint addr[3];
MPI_Get_address(single, &addr[0]);
MPI_Get_address(&single->nbits, &addr[1]);
MPI_Get_address(&single->bits, &addr[2]);
offsets[0] = addr[1] - addr[0];
oldtypes[0] = MPI_INT;
blockcounts[0] = 1;
offsets[1] = addr[2] - addr[0];
oldtypes[1] = MPI_CHAR;
blockcounts[1] = SIZE;
MPI_Type_create_struct(2, blockcounts, offsets, oldtypes, &set_type);
MPI_Type_commit(&set_type);
MPI_Type_extent(set_type, &extent);
buf = malloc((int)extent * np);
single->nbits = 2;
for(i=0; i<single->nbits; i++)
single->bits[i] = 'A' + rank;
MPI_Gather(single, 1, set_type, buf, 1, set_type, 0, MPI_COMM_WORLD);
if(rank == 0) {
struct set *fs = buf;
for(i=0; i<np; i++) {
printf("from rank %d: bits => %p nbits => %d\n", i, fs->bits, fs->nbits);
for(int j=0; j<2; j++)
printf("from rank %d: buf[%d] = %#x\n",
i, j, fs->bits[j]);
fs = (struct set *)((char *)fs + extent);
}
}
MPI_Type_free(&set_type);
MPI_Finalize();
}
source to share