code example
shmem SGI shmem
one-sided communication
HMPP CAPS Hybrid Multi-core Parallel Programming Environment

#include <mpp/shmem.h>


#pragma hmpp code1 codelet, &
#pragma hmpp code1 target=CUDA:BROOK:PTHREADSSE:PTHREAD:SSE, &
#pragma hmpp code1 args[x,e].io=inout
static void sub1(int n, float x[n], float y[n+2], float e[1]) {
  int i;
  float e_local = e[0];
  // do computation
  for (i=0; i<n; ++i) {
   x[i] += ( y[i+2] + y[i] )*.5;
   e_local += y[i+1] * y[i+1];
  }
  e[0] = e_local;
}

int main(int argc, char *argv[]) {
  int n = ...;
  start_pes(0);
  int nn = (n-1) / _num_pes();
  int n_local0 = 1 + _my_pe() * nn;
  int n_local1 = 1 + (_my_pe()+1) * nn;
  // allocate only local part + ghost zone of the arrays x,y
  float *x, *y;
  x = (float*)shmalloc((n_local1 - n_local0 + 2)*sizeof(float));
  y = (float*)shmalloc((n_local1 - n_local0 + 2)*sizeof(float));
  x -= (n_local0 - 1);
  y -= (n_local0 - 1);
  shmem_barrier_all();

  ... // fill x, y

  // fill ghost zone
  if (_my_pe() > 0)
   shmem_float_put(&y[n_local1], &y[n_local0], 1, _my_pe()-1);
  if (_my_pe() < _num_pes()-1)
   shmem_float_put(&y[n_local0-1], &y[n_local1-1], 1, _my_pe()+1);
  shmem_barrier_all();

  float e = 0;
  #pragma hmpp code1 callsite
  sub1(n_local1-n_local0, &x[n_local0], &y[n_local0-1], &e);

  static float work[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
  static long sync[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
  static float el, es;
  el = e;
  shmem_float_sum_to_all(&es, &el, 1,
   0, 0, _num_pes(), work, sync);
  e = es;

  ... // output x, e

  x += (n_local0 - 1);
  y += (n_local0 - 1);
  shfree(x);
  shfree(y);
  return 0;
}

[start] [references] [download] [install]