Parallel For

MPI version 2 one-sided communication

Virtual shared memory distributed memory system, e.g. large shared memory computers and distributed memory computers with low latency communication. Emulation on message-passing systems.

A simplified translation of the following example parallel-for loop is given below.


...

Grid1 *g = new Grid1(0, n+1);
 
Grid1IteratorSub it(1, n, g);
 
DistArray x(g), y(g);
 
float e = 0;
 
...

ForEach(int i, it,
 
  x(i) += ( y(i+1) + y(i-1) )*.5;
 
  e    += sqr( y(i) ); )
 
...

main code:


#include <mpi.h>

int main(int argc, char *argv[]) {

 	MPI_Init(&argc, &argv);

 	...

   int numproc, p;

   MPI_Comm_size(MPI_COMM_WORLD, &numproc);

   MPI_Comm_rank(MPI_COMM_WORLD, &p);

   int p_left = -1, p_right = -1;

   if (p > 0)

       p_left = p-1;
   if (p < numproc-1)

       p_right = p+1;

   int n_local0 = 1 + (p * (n-1)) / numproc;

   int n_local1 = 1 + ((p+1) * (n-1)) / numproc;

   // allocate only local part + ghost zone of the arrays x,y

   float *x, *y;

   MPI_Alloc_mem(sizeof(float) * (n_local1 - n_local0 + 2),

   MPI_INFO_NULL, &x);

   MPI_Alloc_mem(sizeof(float) * (n_local1 - n_local0 + 2),

   MPI_INFO_NULL, &y);

   x -= (n_local0-1);

   y -= (n_local0-1);

   MPI_Win win;

   MPI_Win_create(&y[n_local0], sizeof(float) * (n_local1 - n_local0),

   sizeof(float), MPI_INFO_NULL, MPI_COMM_WORLD, &win);

   float e;


  ...




   // fill ghost zone

   MPI_Win_fence(0, win);

   if (p_left != -1)

      MPI_Get(&y[n_local0-1], 1, MPI_FLOAT, p_left,

   n_local1-1, 1, MPI_FLOAT, win);

   if (p_right != -1)

      MPI_Get(&y[n_local1], 1, MPI_FLOAT, p_right,

   0, 1, MPI_FLOAT, win);

   MPI_Win_fence(0, win);

   // do computation

   float e_local = 0;

   for (int i=n_local0; i<n_local1; ++i) {

      x[i] += ( y[i+1] + y[i-1] )*.5;

      e_local += y[i] * y[i];

   }

   MPI_Allreduce(&e_local, &e, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);


  ...

   MPI_Win_free(&win);

   x += (n_local0-1);

   y += (n_local0-1);

   MPI_Free_mem(y);

   MPI_Free_mem(x);

   MPI_Finalize();

   return 0;

}