MPI version 2 one-sided communication

Virtual shared memory distributed memory system, e.g. large shared memory computers and distributed memory computers with low latency communication. Emulation on message-passing systems.

A simplified translation of the following example parallel-for loop is given below.

...
Grid1 *g = new Grid1(0, n+1);
Grid1IteratorSub it(1, n, g);
DistArray x(g), y(g);
float e = 0;
...
ForEach(int i, it,
  x(i) += ( y(i+1) + y(i-1) )*.5;
  e += sqr( y(i) ); )
...


main code:

#include <mpi.h>
int main(int argc, char *argv[]) {
  MPI_Init(&argc, &argv);
  ...
  int numproc, p;
  MPI_Comm_size(MPI_COMM_WORLD, &numproc);
  MPI_Comm_rank(MPI_COMM_WORLD, &p);
  int p_left = -1, p_right = -1;
  if (p > 0)
   p_left = p-1;   if (p < numproc-1)
   p_right = p+1;
  int n_local0 = 1 + (p * (n-1)) / numproc;
  int n_local1 = 1 + ((p+1) * (n-1)) / numproc;
  // allocate only local part + ghost zone of the arrays x,y
  float *x, *y;
  MPI_Alloc_mem(sizeof(float) * (n_local1 - n_local0 + 2),
   MPI_INFO_NULL, &x);
  MPI_Alloc_mem(sizeof(float) * (n_local1 - n_local0 + 2),
   MPI_INFO_NULL, &y);
  x -= (n_local0-1);
  y -= (n_local0-1);
  MPI_Win win;
  MPI_Win_create(&y[n_local0], sizeof(float) * (n_local1 - n_local0),
   sizeof(float), MPI_INFO_NULL, MPI_COMM_WORLD, &win);
  float e;
  ...
  // fill ghost zone
  MPI_Win_fence(0, win);
  if (p_left != -1)
   MPI_Get(&y[n_local0-1], 1, MPI_FLOAT, p_left,
   n_local1-1, 1, MPI_FLOAT, win);
  if (p_right != -1)
   MPI_Get(&y[n_local1], 1, MPI_FLOAT, p_right,
   0, 1, MPI_FLOAT, win);
  MPI_Win_fence(0, win);
  // do computation
  float e_local = 0;
  for (int i=n_local0; i<n_local1; ++i) {
   x[i] += ( y[i+1] + y[i-1] )*.5;
   e_local += y[i] * y[i];
  }
  MPI_Allreduce(&e_local, &e, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);
  ...
  MPI_Win_free(&win);
  x += (n_local0-1);
  y += (n_local0-1);
  MPI_Free_mem(y);
  MPI_Free_mem(x);
  MPI_Finalize();
  return 0;
}