Actual source code: ex15.c

  1: static char help[] = "  Test VecScatterRemap() on various vecscatter. \n\
  2: We may do optimization based on index patterns. After index remapping by VecScatterRemap(), we need to \n\
  3: make sure the vecscatter works as expected with the optimizaiton. \n\
  4:   VecScatterRemap() does not support all kinds of vecscatters. In addition, it only supports remapping \n\
  5: entries where we read the data (i.e., todata in parallel scatter, fromdata in sequential scatter). This test \n\
  6: tests VecScatterRemap on parallel to parallel (PtoP) vecscatter, sequential general to sequential \n\
  7: general (SGToSG) vecscatter and sequential general to sequential stride 1 (SGToSS_Stride1) vecscatter.\n\n";

  9: #include <petscvec.h>

 11: int main(int argc, char **argv)
 12: {
 13:   PetscInt        i, n, *ix, *iy, *tomap, start;
 14:   Vec             x, y;
 15:   PetscMPIInt     nproc, rank;
 16:   IS              isx, isy;
 17:   const PetscInt *ranges;
 18:   VecScatter      vscat;

 21:   PetscInitialize(&argc, &argv, (char *)0, help);
 22:   MPI_Comm_size(PETSC_COMM_WORLD, &nproc);
 23:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);


 27:   /* ====================================================================
 28:      (1) test VecScatterRemap on a parallel to parallel (PtoP) vecscatter
 29:      ====================================================================
 30:    */

 32:   n = 64; /* long enough to trigger memcpy optimizations both in local scatter and remote scatter */

 34:   /* create two MPI vectors x, y of length n=64, N=128 */
 35:   VecCreateMPI(PETSC_COMM_WORLD, n, PETSC_DECIDE, &x);
 36:   VecDuplicate(x, &y);

 38:   /* Initialize x as {0~127} */
 39:   VecGetOwnershipRanges(x, &ranges);
 40:   for (i = ranges[rank]; i < ranges[rank + 1]; i++) VecSetValue(x, i, (PetscScalar)i, INSERT_VALUES);
 41:   VecAssemblyBegin(x);
 42:   VecAssemblyEnd(x);

 44:   /* create two general index sets isx = {0~127} and isy = {32~63,64~95,96~127,0~31}. isx is sequential, but we use
 45:      it as general and let PETSc detect the pattern and optimize it. indices in isy are set to make the vecscatter
 46:      have both local scatter and remote scatter (i.e., MPI communication)
 47:    */
 48:   PetscMalloc2(n, &ix, n, &iy);
 49:   start = ranges[rank];
 50:   for (i = ranges[rank]; i < ranges[rank + 1]; i++) ix[i - start] = i;
 51:   ISCreateGeneral(PETSC_COMM_WORLD, n, ix, PETSC_COPY_VALUES, &isx);

 53:   if (rank == 0) {
 54:     for (i = 0; i < n; i++) iy[i] = i + 32;
 55:   } else
 56:     for (i = 0; i < n / 2; i++) {
 57:       iy[i]         = i + 96;
 58:       iy[i + n / 2] = i;
 59:     }

 61:   ISCreateGeneral(PETSC_COMM_WORLD, n, iy, PETSC_COPY_VALUES, &isy);

 63:   /* create a vecscatter that shifts x to the tail by quater periodically and puts the results in y */
 64:   VecScatterCreate(x, isx, y, isy, &vscat);
 65:   VecScatterBegin(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
 66:   VecScatterEnd(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD);

 68:   /* view y to check the result. y should be {Q3,Q0,Q1,Q2} of x, that is {96~127,0~31,32~63,64~95} */
 69:   PetscPrintf(PETSC_COMM_WORLD, "Before VecScatterRemap on PtoP, MPI vector y is:\n");
 70:   VecView(y, PETSC_VIEWER_STDOUT_WORLD);

 72:   /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter. It changes where we read vector
 73:      x entries to send out, but does not change the communication pattern (i.e., send/recv pairs and msg lengths).

 75:      We create tomap as {32~63,0~31}. Originally, we read from indices {0~64} of the local x to send out. The remap
 76:      does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices {32~63,0~31} of the local x.
 77:      isy is unchanged. So, we will shift x to {Q2,Q1,Q0,Q3}, that is {64~95,32~63,0~31,96~127}
 78:   */
 79:   PetscMalloc1(n, &tomap);
 80:   for (i = 0; i < n / 2; i++) {
 81:     tomap[i]         = i + n / 2;
 82:     tomap[i + n / 2] = i;
 83:   };
 84:   VecScatterRemap(vscat, tomap, NULL);
 85:   VecScatterBegin(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
 86:   VecScatterEnd(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD);

 88:   /* view y to check the result. y should be {64~95,32~63,0~31,96~127} */
 89:   PetscPrintf(PETSC_COMM_WORLD, "After VecScatterRemap on PtoP, MPI vector y is:\n");
 90:   VecView(y, PETSC_VIEWER_STDOUT_WORLD);

 92:   /* destroy everything before we recreate them in different types */
 93:   PetscFree2(ix, iy);
 94:   VecDestroy(&x);
 95:   VecDestroy(&y);
 96:   ISDestroy(&isx);
 97:   ISDestroy(&isy);
 98:   PetscFree(tomap);
 99:   VecScatterDestroy(&vscat);

101:   /* ==========================================================================================
102:      (2) test VecScatterRemap on a sequential general to sequential general (SGToSG) vecscatter
103:      ==========================================================================================
104:    */
105:   n = 64; /* long enough to trigger memcpy optimizations in local scatter */

107:   /* create two seq vectors x, y of length n */
108:   VecCreateSeq(PETSC_COMM_SELF, n, &x);
109:   VecDuplicate(x, &y);

111:   /* Initialize x as {0~63} */
112:   for (i = 0; i < n; i++) VecSetValue(x, i, (PetscScalar)i, INSERT_VALUES);
113:   VecAssemblyBegin(x);
114:   VecAssemblyEnd(x);

116:   /* create two general index sets isx = isy = {0~63}, which are sequential, but we use them as
117:      general and let PETSc detect the pattern and optimize it */
118:   PetscMalloc2(n, &ix, n, &iy);
119:   for (i = 0; i < n; i++) ix[i] = i;
120:   ISCreateGeneral(PETSC_COMM_SELF, n, ix, PETSC_COPY_VALUES, &isx);
121:   ISDuplicate(isx, &isy);

123:   /* create a vecscatter that just copies x to y */
124:   VecScatterCreate(x, isx, y, isy, &vscat);
125:   VecScatterBegin(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
126:   VecScatterEnd(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD);

128:   /* view y to check the result. y should be {0~63} */
129:   PetscPrintf(PETSC_COMM_WORLD, "\nBefore VecScatterRemap on SGToSG, SEQ vector y is:\n");
130:   VecView(y, PETSC_VIEWER_STDOUT_WORLD);

132:   /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter.

134:      Create tomap as {32~63,0~31}. Originally, we read from indices {0~64} of seq x to write to y. The remap
135:      does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32~63,0~31} of seq x.
136:    */
137:   PetscMalloc1(n, &tomap);
138:   for (i = 0; i < n / 2; i++) {
139:     tomap[i]         = i + n / 2;
140:     tomap[i + n / 2] = i;
141:   };
142:   VecScatterRemap(vscat, tomap, NULL);
143:   VecScatterBegin(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
144:   VecScatterEnd(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD);

146:   /* view y to check the result. y should be {32~63,0~31} */
147:   PetscPrintf(PETSC_COMM_WORLD, "After VecScatterRemap on SGToSG, SEQ vector y is:\n");
148:   VecView(y, PETSC_VIEWER_STDOUT_WORLD);

150:   /* destroy everything before we recreate them in different types */
151:   PetscFree2(ix, iy);
152:   VecDestroy(&x);
153:   VecDestroy(&y);
154:   ISDestroy(&isx);
155:   ISDestroy(&isy);
156:   PetscFree(tomap);
157:   VecScatterDestroy(&vscat);

159:   /* ===================================================================================================
160:      (3) test VecScatterRemap on a sequential general to sequential stride 1 (SGToSS_Stride1) vecscatter
161:      ===================================================================================================
162:    */
163:   n = 64; /* long enough to trigger memcpy optimizations in local scatter */

165:   /* create two seq vectors x of length n, and y of length n/2 */
166:   VecCreateSeq(PETSC_COMM_SELF, n, &x);
167:   VecCreateSeq(PETSC_COMM_SELF, n / 2, &y);

169:   /* Initialize x as {0~63} */
170:   for (i = 0; i < n; i++) VecSetValue(x, i, (PetscScalar)i, INSERT_VALUES);
171:   VecAssemblyBegin(x);
172:   VecAssemblyEnd(x);

174:   /* create a general index set isx = {0:63:2}, which actually is a stride IS with first=0, n=32, step=2,
175:      but we use it as general and let PETSc detect the pattern and optimize it. */
176:   PetscMalloc2(n / 2, &ix, n / 2, &iy);
177:   for (i = 0; i < n / 2; i++) ix[i] = i * 2;
178:   ISCreateGeneral(PETSC_COMM_SELF, n / 2, ix, PETSC_COPY_VALUES, &isx);

180:   /* create a stride1 index set isy = {0~31}. We intentionally set the step to 1 to trigger optimizations */
181:   ISCreateStride(PETSC_COMM_SELF, 32, 0, 1, &isy);

183:   /* create a vecscatter that just copies even entries of x to y */
184:   VecScatterCreate(x, isx, y, isy, &vscat);
185:   VecScatterBegin(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
186:   VecScatterEnd(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD);

188:   /* view y to check the result. y should be {0:63:2} */
189:   PetscPrintf(PETSC_COMM_WORLD, "\nBefore VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n");
190:   VecView(y, PETSC_VIEWER_STDOUT_WORLD);

192:   /* now call the weird subroutine VecScatterRemap to slightly change the vecscatter.

194:      Create tomap as {32~63,0~31}. Originally, we read from indices{0:63:2} of seq x to write to y. The remap
195:      does indices[i] = tomap[indices[i]]. Therefore, after the remap, we read from indices{32:63:2,0:31:2} of seq x.
196:    */
197:   PetscMalloc1(n, &tomap);
198:   for (i = 0; i < n / 2; i++) {
199:     tomap[i]         = i + n / 2;
200:     tomap[i + n / 2] = i;
201:   };
202:   VecScatterRemap(vscat, tomap, NULL);
203:   VecScatterBegin(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD);
204:   VecScatterEnd(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD);

206:   /* view y to check the result. y should be {32:63:2,0:31:2} */
207:   PetscPrintf(PETSC_COMM_WORLD, "After VecScatterRemap on SGToSS_Stride1, SEQ vector y is:\n");
208:   VecView(y, PETSC_VIEWER_STDOUT_WORLD);

210:   /* destroy everything before PetscFinalize */
211:   PetscFree2(ix, iy);
212:   VecDestroy(&x);
213:   VecDestroy(&y);
214:   ISDestroy(&isx);
215:   ISDestroy(&isy);
216:   PetscFree(tomap);
217:   VecScatterDestroy(&vscat);

219:   PetscFinalize();
220:   return 0;
221: }

223: /*TEST

225:    test:
226:       suffix: 1
227:       nsize: 2
228:       diff_args: -j
229:       requires: double
230: TEST*/