Actual source code: ex18.c

  1: static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\
  2: Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n";

  4: #include <petscmat.h>

  6: int main(int argc, char **args)
  7: {
  8:   Mat         A;
  9:   Vec         x, rhs, y;
 10:   PetscInt    i, j, k, b, m = 3, n, nlocal = 2, bs = 1, Ii, J;
 11:   PetscInt   *boundary_nodes, nboundary_nodes, *boundary_indices;
 12:   PetscMPIInt rank, size;
 13:   PetscScalar v, v0, v1, v2, a0 = 0.1, a, rhsval, *boundary_values, diag = 1.0;
 14:   PetscReal   norm;
 15:   char        convname[64];
 16:   PetscBool   upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE;

 19:   PetscInitialize(&argc, &args, (char *)0, help);
 20:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
 21:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
 22:   n = nlocal * size;

 24:   PetscOptionsGetInt(NULL, NULL, "-bs", &bs, NULL);
 25:   PetscOptionsGetBool(NULL, NULL, "-nonlocal_bc", &nonlocalBC, NULL);
 26:   PetscOptionsGetScalar(NULL, NULL, "-diag", &diag, NULL);
 27:   PetscOptionsGetString(NULL, NULL, "-convname", convname, sizeof(convname), &convert);
 28:   PetscOptionsGetBool(NULL, NULL, "-zerorhs", &zerorhs, NULL);

 30:   MatCreate(PETSC_COMM_WORLD, &A);
 31:   MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m * n * bs, m * n * bs);
 32:   MatSetFromOptions(A);
 33:   MatSetUp(A);

 35:   MatCreateVecs(A, NULL, &rhs);
 36:   VecSetFromOptions(rhs);
 37:   VecSetUp(rhs);

 39:   rhsval = 0.0;
 40:   for (i = 0; i < m; i++) {
 41:     for (j = nlocal * rank; j < nlocal * (rank + 1); j++) {
 42:       a = a0;
 43:       for (b = 0; b < bs; b++) {
 44:         /* let's start with a 5-point stencil diffusion term */
 45:         v  = -1.0;
 46:         Ii = (j + n * i) * bs + b;
 47:         if (i > 0) {
 48:           J = Ii - n * bs;
 49:           MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
 50:         }
 51:         if (i < m - 1) {
 52:           J = Ii + n * bs;
 53:           MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
 54:         }
 55:         if (j > 0) {
 56:           J = Ii - 1 * bs;
 57:           MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
 58:         }
 59:         if (j < n - 1) {
 60:           J = Ii + 1 * bs;
 61:           MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
 62:         }
 63:         v = 4.0;
 64:         MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES);
 65:         if (upwind) {
 66:           /* now add a 2nd order upwind advection term to add a little asymmetry */
 67:           if (j > 2) {
 68:             J  = Ii - 2 * bs;
 69:             v2 = 0.5 * a;
 70:             v1 = -2.0 * a;
 71:             v0 = 1.5 * a;
 72:             MatSetValues(A, 1, &Ii, 1, &J, &v2, ADD_VALUES);
 73:           } else {
 74:             /* fall back to 1st order upwind */
 75:             v1 = -1.0 * a;
 76:             v0 = 1.0 * a;
 77:           };
 78:           if (j > 1) {
 79:             J = Ii - 1 * bs;
 80:             MatSetValues(A, 1, &Ii, 1, &J, &v1, ADD_VALUES);
 81:           }
 82:           MatSetValues(A, 1, &Ii, 1, &Ii, &v0, ADD_VALUES);
 83:           a /= 10.; /* use a different velocity for the next component */
 84:           /* add a coupling to the previous and next components */
 85:           v = 0.5;
 86:           if (b > 0) {
 87:             J = Ii - 1;
 88:             MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
 89:           }
 90:           if (b < bs - 1) {
 91:             J = Ii + 1;
 92:             MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
 93:           }
 94:         }
 95:         /* make up some rhs */
 96:         VecSetValue(rhs, Ii, rhsval, INSERT_VALUES);
 97:         rhsval += 1.0;
 98:       }
 99:     }
100:   }
101:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
102:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);

104:   if (convert) { /* Test different Mat implementations */
105:     Mat B;

107:     MatConvert(A, convname, MAT_INITIAL_MATRIX, &B);
108:     MatDestroy(&A);
109:     A = B;
110:   }

112:   VecAssemblyBegin(rhs);
113:   VecAssemblyEnd(rhs);
114:   /* set rhs to zero to simplify */
115:   if (zerorhs) VecZeroEntries(rhs);

117:   if (nonlocalBC) {
118:     /*version where boundary conditions are set by processes that don't necessarily own the nodes */
119:     if (rank == 0) {
120:       nboundary_nodes = size > m ? nlocal : m - size + nlocal;
121:       PetscMalloc1(nboundary_nodes, &boundary_nodes);
122:       k = 0;
123:       for (i = size; i < m; i++, k++) boundary_nodes[k] = n * i;
124:     } else if (rank < m) {
125:       nboundary_nodes = nlocal + 1;
126:       PetscMalloc1(nboundary_nodes, &boundary_nodes);
127:       boundary_nodes[0] = rank * n;
128:       k                 = 1;
129:     } else {
130:       nboundary_nodes = nlocal;
131:       PetscMalloc1(nboundary_nodes, &boundary_nodes);
132:       k = 0;
133:     }
134:     for (j = nlocal * rank; j < nlocal * (rank + 1); j++, k++) boundary_nodes[k] = j;
135:   } else {
136:     /*version where boundary conditions are set by the node owners only */
137:     PetscMalloc1(m * n, &boundary_nodes);
138:     k = 0;
139:     for (j = 0; j < n; j++) {
140:       Ii = j;
141:       if (Ii >= rank * m * nlocal && Ii < (rank + 1) * m * nlocal) boundary_nodes[k++] = Ii;
142:     }
143:     for (i = 1; i < m; i++) {
144:       Ii = n * i;
145:       if (Ii >= rank * m * nlocal && Ii < (rank + 1) * m * nlocal) boundary_nodes[k++] = Ii;
146:     }
147:     nboundary_nodes = k;
148:   }

150:   VecDuplicate(rhs, &x);
151:   VecZeroEntries(x);
152:   PetscMalloc2(nboundary_nodes * bs, &boundary_indices, nboundary_nodes * bs, &boundary_values);
153:   for (k = 0; k < nboundary_nodes; k++) {
154:     Ii = boundary_nodes[k] * bs;
155:     v  = 1.0 * boundary_nodes[k];
156:     for (b = 0; b < bs; b++, Ii++) {
157:       boundary_indices[k * bs + b] = Ii;
158:       boundary_values[k * bs + b]  = v;
159:       PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %" PetscInt_FMT " %f\n", rank, Ii, (double)PetscRealPart(v));
160:       v += 0.1;
161:     }
162:   }
163:   PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL);
164:   VecSetValues(x, nboundary_nodes * bs, boundary_indices, boundary_values, INSERT_VALUES);
165:   VecAssemblyBegin(x);
166:   VecAssemblyEnd(x);

168:   /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x  and overwriting the boundary entries with boundary values */
169:   VecDuplicate(x, &y);
170:   MatMult(A, x, y);
171:   VecAYPX(y, -1.0, rhs);
172:   for (k = 0; k < nboundary_nodes * bs; k++) boundary_values[k] *= diag;
173:   VecSetValues(y, nboundary_nodes * bs, boundary_indices, boundary_values, INSERT_VALUES);
174:   VecAssemblyBegin(y);
175:   VecAssemblyEnd(y);

177:   PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n");
178:   MatView(A, PETSC_VIEWER_STDOUT_WORLD);
179:   VecView(x, PETSC_VIEWER_STDOUT_WORLD);

181:   MatZeroRowsColumns(A, nboundary_nodes * bs, boundary_indices, diag, x, rhs);
182:   PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n");
183:   VecView(rhs, PETSC_VIEWER_STDOUT_WORLD);
184:   VecAXPY(y, -1.0, rhs);
185:   VecNorm(y, NORM_INFINITY, &norm);
186:   if (norm > 1.0e-10) {
187:     PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm);
188:     VecView(y, PETSC_VIEWER_STDOUT_WORLD);
189:     SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns");
190:   }

192:   PetscFree(boundary_nodes);
193:   PetscFree2(boundary_indices, boundary_values);
194:   VecDestroy(&x);
195:   VecDestroy(&y);
196:   VecDestroy(&rhs);
197:   MatDestroy(&A);

199:   PetscFinalize();
200:   return 0;
201: }

203: /*TEST

205:    test:
206:       diff_args: -j
207:       suffix: 0

209:    test:
210:       diff_args: -j
211:       suffix: 1
212:       nsize: 2

214:    test:
215:       diff_args: -j
216:       suffix: 10
217:       nsize: 2
218:       args: -bs 2 -nonlocal_bc

220:    test:
221:       diff_args: -j
222:       suffix: 11
223:       nsize: 7
224:       args: -bs 2 -nonlocal_bc

226:    test:
227:       diff_args: -j
228:       suffix: 12
229:       args: -bs 2 -nonlocal_bc -mat_type baij

231:    test:
232:       diff_args: -j
233:       suffix: 13
234:       nsize: 2
235:       args: -bs 2 -nonlocal_bc -mat_type baij

237:    test:
238:       diff_args: -j
239:       suffix: 14
240:       nsize: 7
241:       args: -bs 2 -nonlocal_bc -mat_type baij

243:    test:
244:       diff_args: -j
245:       suffix: 2
246:       nsize: 7

248:    test:
249:       diff_args: -j
250:       suffix: 3
251:       args: -mat_type baij

253:    test:
254:       diff_args: -j
255:       suffix: 4
256:       nsize: 2
257:       args: -mat_type baij

259:    test:
260:       diff_args: -j
261:       suffix: 5
262:       nsize: 7
263:       args: -mat_type baij

265:    test:
266:       diff_args: -j
267:       suffix: 6
268:       args: -bs 2 -mat_type baij

270:    test:
271:       diff_args: -j
272:       suffix: 7
273:       nsize: 2
274:       args: -bs 2 -mat_type baij

276:    test:
277:       diff_args: -j
278:       suffix: 8
279:       nsize: 7
280:       args: -bs 2 -mat_type baij

282:    test:
283:       diff_args: -j
284:       suffix: 9
285:       args: -bs 2 -nonlocal_bc

287:    test:
288:       diff_args: -j
289:       suffix: 15
290:       args: -bs 2 -nonlocal_bc -convname shell

292:    test:
293:       diff_args: -j
294:       suffix: 16
295:       nsize: 2
296:       args: -bs 2 -nonlocal_bc -convname shell

298:    test:
299:       diff_args: -j
300:       suffix: 17
301:       args: -bs 2 -nonlocal_bc -convname dense

303:    testset:
304:       diff_args: -j
305:       suffix: full
306:       nsize: {{1 3}separate output}
307:       args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0

309:    test:
310:       diff_args: -j
311:       requires: cuda
312:       suffix: cusparse_1
313:       nsize: 1
314:       args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output}

316:    test:
317:       diff_args: -j
318:       requires: cuda
319:       suffix: cusparse_3
320:       nsize: 3
321:       args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse

323: TEST*/