Performs a sum reduction function across a set of processing elements (PEs).
#include <shmem.h> void shmem_complexf_sum_to_all(float complex *target, float complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float complex *pWrk, long *pSync);
The reduction routines compute one or more reductions across symmetric arrays on multiple processing elements (PEs). A reduction performs an associative binary operation across a set of values. The nreduce argument determines the number of separate reductions to perform. The source array on all PEs in the active set provides one element for each reduction. The results of the reductions are placed in the target array on all PEs in the active set that is defined by the PE_start, logPE_stride, PE_size.
The source and target arrays may be the same array, but they may not be overlapping arrays. Each of these functions assumes that only PEs in the active set call the function. If a PE not in the active set calls the collective function, the behavior is undefined.
The function shmem_complexf_sum_to_all performs a reduction applying the sum function to float complex values distributed across the PEs.
Before any PE calls the reduction functions, users have to ensure the following conditions exist:
The pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a reduction function.
The target array on all PEs in the active set is ready to accept the results of the reduction.
Upon returning from the colletcive function, the following conditions are true:
The target data array is updated.
The values in the pSync array are restored to the original values.
Each of these functions assumes that only PEs in the active set call the function. If a PE not in the active set calls the collective function, the behavior is undefined.
#include <stdlib.h> #include <stdio.h> #include <assert.h> #include <unistd.h> #include <math.h> #include <shmem.h> #define ARRAY_SIZE 10 const double DOUBLE_COMPLEX_SUM_REAL_INIT_VAL=8.888888; const double DOUBLE_COMPLEX_SUM_IMAG_INIT_VAL=5.555555; const double DOUBLE_PRECISION_OFFSET=0.000001; const float FLOAT_COMPLEX_SUM_REAL_INIT_VAL=6.66; const float FLOAT_COMPLEX_SUM_IMAG_INIT_VAL=2.22; const float FLOAT_PRECISION_OFFSET=0.0001; #define _SHMEM_REDUCE_MIN_WRKDATA_SIZE 1 #define _SHMEM_PWRK_VALUE 1 #define MAX(X, Y) (X > Y)? X:Y int main (int argc, char* argv[]) { int total_tasks = -1; int PE_size = -1; int my_task = -1; start_pes(0); total_tasks = _num_pes(); if (total_tasks <= 0) { printf("FAILED\n"); exit(1); } else { printf("number of pes is %d\n", total_tasks); } if (total_tasks < 2 || total_tasks % 2) { printf("FAILED: The number of pes should be an even number. (at least 2)\n"); exit(1); } my_task = _my_pe(); if (my_task < 0){ printf("FAILED\n"); exit(1); } else { printf("my pe id is %d\n", my_task); } printf("my pid is %d\n", getpid()); long *syncList = (long *)shmalloc(sizeof(long)*_SHMEM_REDUCE_SYNC_SIZE); long *pSync; pSync = &syncList[0]; int i; for (i=0; i < _SHMEM_BCAST_SYNC_SIZE; i++) { pSync[i] = _SHMEM_SYNC_VALUE; } int size = MAX((ARRAY_SIZE/2+1), _SHMEM_REDUCE_MIN_WRKDATA_SIZE); int *wrkList = (int *)shmalloc(sizeof(long)*size); int *pWrk; pWrk = &wrkList[0]; for (i=0; i < size; i++) { pWrk[i] = _SHMEM_PWRK_VALUE; } double complex *source = (double complex *)shmalloc(sizeof(double complex)*ARRAY_SIZE); double complex *target = (double complex *)shmalloc(sizeof(double complex)*ARRAY_SIZE); shmem_barrier_all(); // Wait for all PEs to initialize pSync and pWrk //Firstly, test double complex sum printf("double complex--->NUM test begin.\n"); for (i=0; i<ARRAY_SIZE; i++) { source[i] = DOUBLE_COMPLEX_SUM_REAL_INIT_VAL + DOUBLE_COMPLEX_SUM_IMAG_INIT_VAL * I; } for (i=0; i<ARRAY_SIZE; i++) { target[i] = 0.0 + 0.0*I; } PE_size = total_tasks; shmem_complexd_sum_to_all(target, source, ARRAY_SIZE, 0, 0, PE_size, (double complex *)pWrk, pSync); //verify the correctness of the result for (i=0; i<ARRAY_SIZE; i++) { if (((creal(target[i])-PE_size*DOUBLE_COMPLEX_SUM_REAL_INIT_VAL)>DOUBLE_PRECISION_OFFSET) && ((cimag(target[i])-PE_size*DOUBLE_COMPLEX_SUM_IMAG_INIT_VAL)>DOUBLE_PRECISION_OFFSET)) { printf("FAILED, target[%d] should be %f+%f*I instead of %f+%f*I\n", i, PE_size*DOUBLE_COMPLEX_SUM_REAL_INIT_VAL, PE_size*DOUBLE_COMPLEX_SUM_IMAG_INIT_VAL, creal(target[i]), cimag(target[i])); exit(1); } } //printf the correct result printf("The following data is the computing result of complexd--->SUM.\n"); for (i=0; i%lf+%lf*I\n", i, creal(target[i]), cimag(target[i])); } printf("double complex--->NUM test finished.\n"); shmem_barrier_all(); float complex *src = (float complex *)shmalloc(sizeof(float complex)*ARRAY_SIZE); float complex *tgt = (float complex *)shmalloc(sizeof(float complex)*ARRAY_SIZE); //Secondly, test float complex sum printf("float complex--->SUM test begin.\n"); for (i=0; i<ARRAY_SIZE; i++) { src[i] = FLOAT_COMPLEX_SUM_REAL_INIT_VAL + FLOAT_COMPLEX_SUM_IMAG_INIT_VAL * I; } for (i=0; i<ARRAY_SIZE; i++) { tgt[i] = 0.0 + 0.0*I; } PE_size = total_tasks; shmem_complexf_sum_to_all(tgt, src, ARRAY_SIZE, 0, 0, PE_size, (float complex *)pWrk, pSync); //verify the correctness of the result for (i=0; i<ARRAY_SIZE; i++) { if (((creal(tgt[i])-PE_size*FLOAT_COMPLEX_SUM_REAL_INIT_VAL)>FLOAT_PRECISION_OFFSET) && ((cimag(tgt[i])-PE_size*FLOAT_COMPLEX_SUM_IMAG_INIT_VAL)>FLOAT_PRECISION_OFFSET)) { printf("FAILED, target[%d] should be %f+%f*I instead of %f+%f*I\n", i, PE_size*FLOAT_COMPLEX_SUM_REAL_INIT_VAL, PE_size*FLOAT_COMPLEX_SUM_IMAG_INIT_VAL, creal(tgt[i]), cimag(tgt[i])); exit(1); } } //printf the correct result printf("The following data is the computing result of complexf--->SUM.\n"); for (i=0; i %f+%f*I\n", i, creal(tgt[i]), cimag(tgt[i])); } printf("float complex--->SUM test finished.\n"); shmem_barrier_all(); printf("PASSED\n"); return 0; }
Subroutines: shmem_and, shmem_barrier, shmem_broadcast, shmem_collect, shmem_max, shmem_min, shmem_or, shmem_prod, shmem_sum, shmem_xor