Parallel For

MPI message passing

Distributed memory system, e.g. PC clusters, massive parallel computers. Data layout: local memory only, manage data transfer explicitely by message passing library calls.

A simplified translation of the following example parallel-for loop is given below.


int main(int argc, char *argv[]) {

 	...

 	Grid1 *g = new Grid1(0, n+1);
 
 	Grid1IteratorSub it(1, n, g);
 
 	DistArray x(g), y(g);
 
 	float e = 0;
 
 	...

 	ForEach(int i, it,
 
   	x(i) += ( y(i+1) + y(i-1) )*.5;
 
   	e    += sqr( y(i) ); )
 
 	...

 	return 0;

}

main code:


#include <mpi.h>

int main(int argc, char *argv[]) {

 	MPI_Init(&argc, &argv);

 	...

 	int numproc, p;

 	MPI_Comm_size(MPI_COMM_WORLD, &numproc);

 	MPI_Comm_rank(MPI_COMM_WORLD, &p);

 	int p_left = -1, p_right = -1;

 	if (p > 0)

 	    p_left = p-1;

 	if (p < numproc-1)

 	    p_right = p+1;

 	int n_local0 = 1 + (p * (n-1)) / numproc;

 	int n_local1 = 1 + ((p+1) * (n-1)) / numproc;

 	// allocate only local part + ghost zone of the arrays x,y

 	float *x = new float[n_local1 - n_local0 + 2] - (n_local0-1);

 	float *y = new float[n_local1 - n_local0 + 2] - (n_local0-1);

 	float e;
 
 	...

 	// fill ghost zone

 	MPI_Request req0, req1;

 	MPI_Status s;

 	if (p_left != -1)

     MPI_Isend(&y[n_local0], 1, MPI_FLOAT, p_left,

          1, MPI_COMM_WORLD, &req0);

 	if (p_right != -1) {

     MPI_Recv(&y[n_local1], 1, MPI_FLOAT, p_right,

          1, MPI_COMM_WORLD, &s);

     MPI_Isend(&y[n_local1-1], 1, MPI_FLOAT, p_right,

          2, MPI_COMM_WORLD, &req1);

     MPI_Wait(&req1, &s);

 	}

 	if (p_left != -1) {

     MPI_Recv(&y[n_local0-1], 1, MPI_FLOAT, p_left,

          2, MPI_COMM_WORLD, &s);

     MPI_Wait(&req0, &s);

 	}

 	// do computation

 	float e_local = 0;

 	for (int i=n_local0; i<n_local1; ++i) {

     x[i] += ( y[i+1] + y[i-1] )*.5;

     e_local += y[i] * y[i];

 	}

 	MPI_Allreduce(&e_local, &e, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);

 	...

   x += (n_local0-1);

   y += (n_local0-1);

   delete[] x, y ;

 	MPI_Finalize();

 	return 0;

}