Actual source code: ex18.c
1: static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\
2: Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n";
4: #include <petscmat.h>
6: int main(int argc, char **args)
7: {
8: Mat A;
9: Vec x, rhs, y;
10: PetscInt i, j, k, b, m = 3, n, nlocal = 2, bs = 1, Ii, J;
11: PetscInt *boundary_nodes, nboundary_nodes, *boundary_indices;
12: PetscMPIInt rank, size;
13: PetscScalar v, v0, v1, v2, a0 = 0.1, a, rhsval, *boundary_values, diag = 1.0;
14: PetscReal norm;
15: char convname[64];
16: PetscBool upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE;
19: PetscInitialize(&argc, &args, (char *)0, help);
20: MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
21: MPI_Comm_size(PETSC_COMM_WORLD, &size);
22: n = nlocal * size;
24: PetscOptionsGetInt(NULL, NULL, "-bs", &bs, NULL);
25: PetscOptionsGetBool(NULL, NULL, "-nonlocal_bc", &nonlocalBC, NULL);
26: PetscOptionsGetScalar(NULL, NULL, "-diag", &diag, NULL);
27: PetscOptionsGetString(NULL, NULL, "-convname", convname, sizeof(convname), &convert);
28: PetscOptionsGetBool(NULL, NULL, "-zerorhs", &zerorhs, NULL);
30: MatCreate(PETSC_COMM_WORLD, &A);
31: MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m * n * bs, m * n * bs);
32: MatSetFromOptions(A);
33: MatSetUp(A);
35: MatCreateVecs(A, NULL, &rhs);
36: VecSetFromOptions(rhs);
37: VecSetUp(rhs);
39: rhsval = 0.0;
40: for (i = 0; i < m; i++) {
41: for (j = nlocal * rank; j < nlocal * (rank + 1); j++) {
42: a = a0;
43: for (b = 0; b < bs; b++) {
44: /* let's start with a 5-point stencil diffusion term */
45: v = -1.0;
46: Ii = (j + n * i) * bs + b;
47: if (i > 0) {
48: J = Ii - n * bs;
49: MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
50: }
51: if (i < m - 1) {
52: J = Ii + n * bs;
53: MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
54: }
55: if (j > 0) {
56: J = Ii - 1 * bs;
57: MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
58: }
59: if (j < n - 1) {
60: J = Ii + 1 * bs;
61: MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
62: }
63: v = 4.0;
64: MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES);
65: if (upwind) {
66: /* now add a 2nd order upwind advection term to add a little asymmetry */
67: if (j > 2) {
68: J = Ii - 2 * bs;
69: v2 = 0.5 * a;
70: v1 = -2.0 * a;
71: v0 = 1.5 * a;
72: MatSetValues(A, 1, &Ii, 1, &J, &v2, ADD_VALUES);
73: } else {
74: /* fall back to 1st order upwind */
75: v1 = -1.0 * a;
76: v0 = 1.0 * a;
77: };
78: if (j > 1) {
79: J = Ii - 1 * bs;
80: MatSetValues(A, 1, &Ii, 1, &J, &v1, ADD_VALUES);
81: }
82: MatSetValues(A, 1, &Ii, 1, &Ii, &v0, ADD_VALUES);
83: a /= 10.; /* use a different velocity for the next component */
84: /* add a coupling to the previous and next components */
85: v = 0.5;
86: if (b > 0) {
87: J = Ii - 1;
88: MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
89: }
90: if (b < bs - 1) {
91: J = Ii + 1;
92: MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
93: }
94: }
95: /* make up some rhs */
96: VecSetValue(rhs, Ii, rhsval, INSERT_VALUES);
97: rhsval += 1.0;
98: }
99: }
100: }
101: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
102: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
104: if (convert) { /* Test different Mat implementations */
105: Mat B;
107: MatConvert(A, convname, MAT_INITIAL_MATRIX, &B);
108: MatDestroy(&A);
109: A = B;
110: }
112: VecAssemblyBegin(rhs);
113: VecAssemblyEnd(rhs);
114: /* set rhs to zero to simplify */
115: if (zerorhs) VecZeroEntries(rhs);
117: if (nonlocalBC) {
118: /*version where boundary conditions are set by processes that don't necessarily own the nodes */
119: if (rank == 0) {
120: nboundary_nodes = size > m ? nlocal : m - size + nlocal;
121: PetscMalloc1(nboundary_nodes, &boundary_nodes);
122: k = 0;
123: for (i = size; i < m; i++, k++) boundary_nodes[k] = n * i;
124: } else if (rank < m) {
125: nboundary_nodes = nlocal + 1;
126: PetscMalloc1(nboundary_nodes, &boundary_nodes);
127: boundary_nodes[0] = rank * n;
128: k = 1;
129: } else {
130: nboundary_nodes = nlocal;
131: PetscMalloc1(nboundary_nodes, &boundary_nodes);
132: k = 0;
133: }
134: for (j = nlocal * rank; j < nlocal * (rank + 1); j++, k++) boundary_nodes[k] = j;
135: } else {
136: /*version where boundary conditions are set by the node owners only */
137: PetscMalloc1(m * n, &boundary_nodes);
138: k = 0;
139: for (j = 0; j < n; j++) {
140: Ii = j;
141: if (Ii >= rank * m * nlocal && Ii < (rank + 1) * m * nlocal) boundary_nodes[k++] = Ii;
142: }
143: for (i = 1; i < m; i++) {
144: Ii = n * i;
145: if (Ii >= rank * m * nlocal && Ii < (rank + 1) * m * nlocal) boundary_nodes[k++] = Ii;
146: }
147: nboundary_nodes = k;
148: }
150: VecDuplicate(rhs, &x);
151: VecZeroEntries(x);
152: PetscMalloc2(nboundary_nodes * bs, &boundary_indices, nboundary_nodes * bs, &boundary_values);
153: for (k = 0; k < nboundary_nodes; k++) {
154: Ii = boundary_nodes[k] * bs;
155: v = 1.0 * boundary_nodes[k];
156: for (b = 0; b < bs; b++, Ii++) {
157: boundary_indices[k * bs + b] = Ii;
158: boundary_values[k * bs + b] = v;
159: PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %" PetscInt_FMT " %f\n", rank, Ii, (double)PetscRealPart(v));
160: v += 0.1;
161: }
162: }
163: PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL);
164: VecSetValues(x, nboundary_nodes * bs, boundary_indices, boundary_values, INSERT_VALUES);
165: VecAssemblyBegin(x);
166: VecAssemblyEnd(x);
168: /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x and overwriting the boundary entries with boundary values */
169: VecDuplicate(x, &y);
170: MatMult(A, x, y);
171: VecAYPX(y, -1.0, rhs);
172: for (k = 0; k < nboundary_nodes * bs; k++) boundary_values[k] *= diag;
173: VecSetValues(y, nboundary_nodes * bs, boundary_indices, boundary_values, INSERT_VALUES);
174: VecAssemblyBegin(y);
175: VecAssemblyEnd(y);
177: PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n");
178: MatView(A, PETSC_VIEWER_STDOUT_WORLD);
179: VecView(x, PETSC_VIEWER_STDOUT_WORLD);
181: MatZeroRowsColumns(A, nboundary_nodes * bs, boundary_indices, diag, x, rhs);
182: PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n");
183: VecView(rhs, PETSC_VIEWER_STDOUT_WORLD);
184: VecAXPY(y, -1.0, rhs);
185: VecNorm(y, NORM_INFINITY, &norm);
186: if (norm > 1.0e-10) {
187: PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm);
188: VecView(y, PETSC_VIEWER_STDOUT_WORLD);
189: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns");
190: }
192: PetscFree(boundary_nodes);
193: PetscFree2(boundary_indices, boundary_values);
194: VecDestroy(&x);
195: VecDestroy(&y);
196: VecDestroy(&rhs);
197: MatDestroy(&A);
199: PetscFinalize();
200: return 0;
201: }
203: /*TEST
205: test:
206: diff_args: -j
207: suffix: 0
209: test:
210: diff_args: -j
211: suffix: 1
212: nsize: 2
214: test:
215: diff_args: -j
216: suffix: 10
217: nsize: 2
218: args: -bs 2 -nonlocal_bc
220: test:
221: diff_args: -j
222: suffix: 11
223: nsize: 7
224: args: -bs 2 -nonlocal_bc
226: test:
227: diff_args: -j
228: suffix: 12
229: args: -bs 2 -nonlocal_bc -mat_type baij
231: test:
232: diff_args: -j
233: suffix: 13
234: nsize: 2
235: args: -bs 2 -nonlocal_bc -mat_type baij
237: test:
238: diff_args: -j
239: suffix: 14
240: nsize: 7
241: args: -bs 2 -nonlocal_bc -mat_type baij
243: test:
244: diff_args: -j
245: suffix: 2
246: nsize: 7
248: test:
249: diff_args: -j
250: suffix: 3
251: args: -mat_type baij
253: test:
254: diff_args: -j
255: suffix: 4
256: nsize: 2
257: args: -mat_type baij
259: test:
260: diff_args: -j
261: suffix: 5
262: nsize: 7
263: args: -mat_type baij
265: test:
266: diff_args: -j
267: suffix: 6
268: args: -bs 2 -mat_type baij
270: test:
271: diff_args: -j
272: suffix: 7
273: nsize: 2
274: args: -bs 2 -mat_type baij
276: test:
277: diff_args: -j
278: suffix: 8
279: nsize: 7
280: args: -bs 2 -mat_type baij
282: test:
283: diff_args: -j
284: suffix: 9
285: args: -bs 2 -nonlocal_bc
287: test:
288: diff_args: -j
289: suffix: 15
290: args: -bs 2 -nonlocal_bc -convname shell
292: test:
293: diff_args: -j
294: suffix: 16
295: nsize: 2
296: args: -bs 2 -nonlocal_bc -convname shell
298: test:
299: diff_args: -j
300: suffix: 17
301: args: -bs 2 -nonlocal_bc -convname dense
303: testset:
304: diff_args: -j
305: suffix: full
306: nsize: {{1 3}separate output}
307: args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0
309: test:
310: diff_args: -j
311: requires: cuda
312: suffix: cusparse_1
313: nsize: 1
314: args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output}
316: test:
317: diff_args: -j
318: requires: cuda
319: suffix: cusparse_3
320: nsize: 3
321: args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse
323: TEST*/