Actual source code: ex5.c
2: static char help[] = "Solves two linear systems in parallel with KSP. The code\n\
3: illustrates repeated solution of linear systems with the same preconditioner\n\
4: method but different matrices (having the same nonzero structure). The code\n\
5: also uses multiple profiling stages. Input arguments are\n\
6: -m <size> : problem size\n\
7: -mat_nonsym : use nonsymmetric matrix (default is symmetric)\n\n";
9: /*
10: Include "petscksp.h" so that we can use KSP solvers. Note that this file
11: automatically includes:
12: petscsys.h - base PETSc routines petscvec.h - vectors
13: petscmat.h - matrices
14: petscis.h - index sets petscksp.h - Krylov subspace methods
15: petscviewer.h - viewers petscpc.h - preconditioners
16: */
17: #include <petscksp.h>
19: int main(int argc, char **args)
20: {
21: KSP ksp; /* linear solver context */
22: Mat C; /* matrix */
23: Vec x, u, b; /* approx solution, RHS, exact solution */
24: PetscReal norm, bnorm; /* norm of solution error */
25: PetscScalar v, none = -1.0;
26: PetscInt Ii, J, ldim, low, high, iglobal, Istart, Iend;
27: PetscInt i, j, m = 3, n = 2, its;
28: PetscMPIInt size, rank;
29: PetscBool mat_nonsymmetric = PETSC_FALSE;
30: PetscBool testnewC = PETSC_FALSE, testscaledMat = PETSC_FALSE;
31: #if defined(PETSC_USE_LOG)
32: PetscLogStage stages[2];
33: #endif
36: PetscInitialize(&argc, &args, (char *)0, help);
37: PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL);
38: MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
39: MPI_Comm_size(PETSC_COMM_WORLD, &size);
40: n = 2 * size;
42: /*
43: Set flag if we are doing a nonsymmetric problem; the default is symmetric.
44: */
45: PetscOptionsGetBool(NULL, NULL, "-mat_nonsym", &mat_nonsymmetric, NULL);
47: PetscOptionsGetBool(NULL, NULL, "-test_scaledMat", &testscaledMat, NULL);
49: /*
50: Register two stages for separate profiling of the two linear solves.
51: Use the runtime option -log_view for a printout of performance
52: statistics at the program's conlusion.
53: */
54: PetscLogStageRegister("Original Solve", &stages[0]);
55: PetscLogStageRegister("Second Solve", &stages[1]);
57: /* -------------- Stage 0: Solve Original System ---------------------- */
58: /*
59: Indicate to PETSc profiling that we're beginning the first stage
60: */
61: PetscLogStagePush(stages[0]);
63: /*
64: Create parallel matrix, specifying only its global dimensions.
65: When using MatCreate(), the matrix format can be specified at
66: runtime. Also, the parallel partitioning of the matrix is
67: determined by PETSc at runtime.
68: */
69: MatCreate(PETSC_COMM_WORLD, &C);
70: MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n);
71: MatSetFromOptions(C);
72: MatSetUp(C);
74: /*
75: Currently, all PETSc parallel matrix formats are partitioned by
76: contiguous chunks of rows across the processors. Determine which
77: rows of the matrix are locally owned.
78: */
79: MatGetOwnershipRange(C, &Istart, &Iend);
81: /*
82: Set matrix entries matrix in parallel.
83: - Each processor needs to insert only elements that it owns
84: locally (but any non-local elements will be sent to the
85: appropriate processor during matrix assembly).
86: - Always specify global row and columns of matrix entries.
87: */
88: for (Ii = Istart; Ii < Iend; Ii++) {
89: v = -1.0;
90: i = Ii / n;
91: j = Ii - i * n;
92: if (i > 0) {
93: J = Ii - n;
94: MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
95: }
96: if (i < m - 1) {
97: J = Ii + n;
98: MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
99: }
100: if (j > 0) {
101: J = Ii - 1;
102: MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
103: }
104: if (j < n - 1) {
105: J = Ii + 1;
106: MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
107: }
108: v = 4.0;
109: MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES);
110: }
112: /*
113: Make the matrix nonsymmetric if desired
114: */
115: if (mat_nonsymmetric) {
116: for (Ii = Istart; Ii < Iend; Ii++) {
117: v = -1.5;
118: i = Ii / n;
119: if (i > 1) {
120: J = Ii - n - 1;
121: MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
122: }
123: }
124: } else {
125: MatSetOption(C, MAT_SYMMETRIC, PETSC_TRUE);
126: MatSetOption(C, MAT_SYMMETRY_ETERNAL, PETSC_TRUE);
127: }
129: /*
130: Assemble matrix, using the 2-step process:
131: MatAssemblyBegin(), MatAssemblyEnd()
132: Computations can be done while messages are in transition
133: by placing code between these two statements.
134: */
135: MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
136: MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
138: /*
139: Create parallel vectors.
140: - When using VecSetSizes(), we specify only the vector's global
141: dimension; the parallel partitioning is determined at runtime.
142: - Note: We form 1 vector from scratch and then duplicate as needed.
143: */
144: VecCreate(PETSC_COMM_WORLD, &u);
145: VecSetSizes(u, PETSC_DECIDE, m * n);
146: VecSetFromOptions(u);
147: VecDuplicate(u, &b);
148: VecDuplicate(b, &x);
150: /*
151: Currently, all parallel PETSc vectors are partitioned by
152: contiguous chunks across the processors. Determine which
153: range of entries are locally owned.
154: */
155: VecGetOwnershipRange(x, &low, &high);
157: /*
158: Set elements within the exact solution vector in parallel.
159: - Each processor needs to insert only elements that it owns
160: locally (but any non-local entries will be sent to the
161: appropriate processor during vector assembly).
162: - Always specify global locations of vector entries.
163: */
164: VecGetLocalSize(x, &ldim);
165: for (i = 0; i < ldim; i++) {
166: iglobal = i + low;
167: v = (PetscScalar)(i + 100 * rank);
168: VecSetValues(u, 1, &iglobal, &v, INSERT_VALUES);
169: }
171: /*
172: Assemble vector, using the 2-step process:
173: VecAssemblyBegin(), VecAssemblyEnd()
174: Computations can be done while messages are in transition,
175: by placing code between these two statements.
176: */
177: VecAssemblyBegin(u);
178: VecAssemblyEnd(u);
180: /*
181: Compute right-hand-side vector
182: */
183: MatMult(C, u, b);
185: /*
186: Create linear solver context
187: */
188: KSPCreate(PETSC_COMM_WORLD, &ksp);
190: /*
191: Set operators. Here the matrix that defines the linear system
192: also serves as the preconditioning matrix.
193: */
194: KSPSetOperators(ksp, C, C);
196: /*
197: Set runtime options (e.g., -ksp_type <type> -pc_type <type>)
198: */
199: KSPSetFromOptions(ksp);
201: /*
202: Solve linear system. Here we explicitly call KSPSetUp() for more
203: detailed performance monitoring of certain preconditioners, such
204: as ICC and ILU. This call is optional, as KSPSetUp() will
205: automatically be called within KSPSolve() if it hasn't been
206: called already.
207: */
208: KSPSetUp(ksp);
209: KSPSolve(ksp, b, x);
211: /*
212: Check the residual
213: */
214: VecAXPY(x, none, u);
215: VecNorm(x, NORM_2, &norm);
216: VecNorm(b, NORM_2, &bnorm);
217: KSPGetIterationNumber(ksp, &its);
218: if (!testscaledMat || norm / bnorm > 1.e-5) PetscPrintf(PETSC_COMM_WORLD, "Relative norm of the residual %g, Iterations %" PetscInt_FMT "\n", (double)norm / (double)bnorm, its);
220: /* -------------- Stage 1: Solve Second System ---------------------- */
221: /*
222: Solve another linear system with the same method. We reuse the KSP
223: context, matrix and vector data structures, and hence save the
224: overhead of creating new ones.
226: Indicate to PETSc profiling that we're concluding the first
227: stage with PetscLogStagePop(), and beginning the second stage with
228: PetscLogStagePush().
229: */
230: PetscLogStagePop();
231: PetscLogStagePush(stages[1]);
233: /*
234: Initialize all matrix entries to zero. MatZeroEntries() retains the
235: nonzero structure of the matrix for sparse formats.
236: */
237: MatZeroEntries(C);
239: /*
240: Assemble matrix again. Note that we retain the same matrix data
241: structure and the same nonzero pattern; we just change the values
242: of the matrix entries.
243: */
244: for (i = 0; i < m; i++) {
245: for (j = 2 * rank; j < 2 * rank + 2; j++) {
246: v = -1.0;
247: Ii = j + n * i;
248: if (i > 0) {
249: J = Ii - n;
250: MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
251: }
252: if (i < m - 1) {
253: J = Ii + n;
254: MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
255: }
256: if (j > 0) {
257: J = Ii - 1;
258: MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
259: }
260: if (j < n - 1) {
261: J = Ii + 1;
262: MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
263: }
264: v = 6.0;
265: MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES);
266: }
267: }
268: if (mat_nonsymmetric) {
269: for (Ii = Istart; Ii < Iend; Ii++) {
270: v = -1.5;
271: i = Ii / n;
272: if (i > 1) {
273: J = Ii - n - 1;
274: MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
275: }
276: }
277: }
278: MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
279: MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
281: if (testscaledMat) {
282: PetscRandom rctx;
284: /* Scale a(0,0) and a(M-1,M-1) */
285: if (rank == 0) {
286: v = 6.0 * 0.00001;
287: Ii = 0;
288: J = 0;
289: MatSetValues(C, 1, &Ii, 1, &J, &v, INSERT_VALUES);
290: } else if (rank == size - 1) {
291: v = 6.0 * 0.00001;
292: Ii = m * n - 1;
293: J = m * n - 1;
294: MatSetValues(C, 1, &Ii, 1, &J, &v, INSERT_VALUES);
295: }
296: MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
297: MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
299: /* Compute a new right-hand-side vector */
300: VecDestroy(&u);
301: VecCreate(PETSC_COMM_WORLD, &u);
302: VecSetSizes(u, PETSC_DECIDE, m * n);
303: VecSetFromOptions(u);
305: PetscRandomCreate(PETSC_COMM_WORLD, &rctx);
306: PetscRandomSetFromOptions(rctx);
307: VecSetRandom(u, rctx);
308: PetscRandomDestroy(&rctx);
309: VecAssemblyBegin(u);
310: VecAssemblyEnd(u);
311: }
313: PetscOptionsGetBool(NULL, NULL, "-test_newMat", &testnewC, NULL);
314: if (testnewC) {
315: /*
316: User may use a new matrix C with same nonzero pattern, e.g.
317: ./ex5 -ksp_monitor -mat_type sbaij -pc_type cholesky -pc_factor_mat_solver_type mumps -test_newMat
318: */
319: Mat Ctmp;
320: MatDuplicate(C, MAT_COPY_VALUES, &Ctmp);
321: MatDestroy(&C);
322: MatDuplicate(Ctmp, MAT_COPY_VALUES, &C);
323: MatDestroy(&Ctmp);
324: }
326: MatMult(C, u, b);
328: /*
329: Set operators. Here the matrix that defines the linear system
330: also serves as the preconditioning matrix.
331: */
332: KSPSetOperators(ksp, C, C);
334: /*
335: Solve linear system
336: */
337: KSPSetUp(ksp);
338: KSPSolve(ksp, b, x);
340: /*
341: Check the residual
342: */
343: VecAXPY(x, none, u);
344: VecNorm(x, NORM_2, &norm);
345: KSPGetIterationNumber(ksp, &its);
346: if (!testscaledMat || norm / bnorm > PETSC_SMALL) PetscPrintf(PETSC_COMM_WORLD, "Relative norm of the residual %g, Iterations %" PetscInt_FMT "\n", (double)norm / (double)bnorm, its);
348: /*
349: Free work space. All PETSc objects should be destroyed when they
350: are no longer needed.
351: */
352: KSPDestroy(&ksp);
353: VecDestroy(&u);
354: VecDestroy(&x);
355: VecDestroy(&b);
356: MatDestroy(&C);
358: /*
359: Indicate to PETSc profiling that we're concluding the second stage
360: */
361: PetscLogStagePop();
363: PetscFinalize();
364: return 0;
365: }
367: /*TEST
369: test:
370: args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always
372: test:
373: suffix: 2
374: nsize: 2
375: args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always -ksp_rtol .000001
377: test:
378: suffix: 5
379: nsize: 2
380: args: -ksp_gmres_cgs_refinement_type refine_always -ksp_monitor draw::draw_lg -ksp_monitor_true_residual draw::draw_lg
382: test:
383: suffix: asm
384: nsize: 4
385: args: -pc_type asm
387: test:
388: suffix: asm_baij
389: nsize: 4
390: args: -pc_type asm -mat_type baij
391: output_file: output/ex5_asm.out
393: test:
394: suffix: redundant_0
395: args: -m 1000 -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi
397: test:
398: suffix: redundant_1
399: nsize: 5
400: args: -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi
402: test:
403: suffix: redundant_2
404: nsize: 5
405: args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi
407: test:
408: suffix: redundant_3
409: nsize: 5
410: args: -pc_type redundant -pc_redundant_number 5 -redundant_ksp_type gmres -redundant_pc_type jacobi
412: test:
413: suffix: redundant_4
414: nsize: 5
415: args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi -psubcomm_type interlaced
417: test:
418: suffix: superlu_dist
419: nsize: 15
420: requires: superlu_dist
421: args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat
423: test:
424: suffix: superlu_dist_2
425: nsize: 15
426: requires: superlu_dist
427: args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact SamePattern_SameRowPerm
428: output_file: output/ex5_superlu_dist.out
430: test:
431: suffix: superlu_dist_3
432: nsize: 15
433: requires: superlu_dist
434: args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 500 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact DOFACT
435: output_file: output/ex5_superlu_dist.out
437: test:
438: suffix: superlu_dist_0
439: nsize: 1
440: requires: superlu_dist
441: args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -test_scaledMat
442: output_file: output/ex5_superlu_dist.out
444: TEST*/