MPI message passing

Distributed memory system, e.g. PC clusters, massive parallel computers. Data layout: local memory only, manage data transfer explicitely by message passing library calls.

A simplified translation of the following example parallel-for loop is given below.

int main(int argc, char *argv[]) {
  ...
  Grid1 *g = new Grid1(0, n+1);
  Grid1IteratorSub it(1, n, g);
  DistArray x(g), y(g);
  float e = 0;
  ...
  ForEach(int i, it,
    x(i) += ( y(i+1) + y(i-1) )*.5;
    e += sqr( y(i) ); )
  ...
  return 0;
}


main code:

#include <mpi.h>
int main(int argc, char *argv[]) {
  MPI_Init(&argc, &argv);
  ...
  int numproc, p;
  MPI_Comm_size(MPI_COMM_WORLD, &numproc);
  MPI_Comm_rank(MPI_COMM_WORLD, &p);
  int p_left = -1, p_right = -1;
  if (p > 0)
     p_left = p-1;
  if (p < numproc-1)
     p_right = p+1;
  int n_local0 = 1 + (p * (n-1)) / numproc;
  int n_local1 = 1 + ((p+1) * (n-1)) / numproc;
  // allocate only local part + ghost zone of the arrays x,y
  float *x = new float[n_local1 - n_local0 + 2] - (n_local0-1);
  float *y = new float[n_local1 - n_local0 + 2] - (n_local0-1);
  float e;
  ...
  // fill ghost zone
  MPI_Request req0, req1;
  MPI_Status s;
  if (p_left != -1)
    MPI_Isend(&y[n_local0], 1, MPI_FLOAT, p_left,
      1, MPI_COMM_WORLD, &req0);
  if (p_right != -1) {
    MPI_Recv(&y[n_local1], 1, MPI_FLOAT, p_right,
      1, MPI_COMM_WORLD, &s);
    MPI_Isend(&y[n_local1-1], 1, MPI_FLOAT, p_right,
      2, MPI_COMM_WORLD, &req1);
    MPI_Wait(&req1, &s);
  }
  if (p_left != -1) {
    MPI_Recv(&y[n_local0-1], 1, MPI_FLOAT, p_left,
      2, MPI_COMM_WORLD, &s);
    MPI_Wait(&req0, &s);
  }
  // do computation
  float e_local = 0;
  for (int i=n_local0; i<n_local1; ++i) {
    x[i] += ( y[i+1] + y[i-1] )*.5;
    e_local += y[i] * y[i];
  }
  MPI_Allreduce(&e_local, &e, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);
  ...
  x += (n_local0-1);
  y += (n_local0-1);
  delete[] x, y ;
  MPI_Finalize();
  return 0;
}