Actual source code: ex3.c
2: static char help[] = "Test PC redistribute on matrix with load imbalance. \n\
3: Modified from src/ksp/ksp/tutorials/ex2.c.\n\
4: Input parameters include:\n\
5: -random_exact_sol : use a random exact solution vector\n\
6: -view_exact_sol : write exact solution vector to stdout\n\
7: -n <mesh_y> : number of mesh points\n\n";
8: /*
9: Example:
10: mpiexec -n 8 ./ex3 -n 10000 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8 -log_view
11: mpiexec -n 8 ./ex3 -n 10000 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8 -log_view
12: */
14: #include <petscksp.h>
16: int main(int argc, char **args)
17: {
18: Vec x, b, u; /* approx solution, RHS, exact solution */
19: Mat A; /* linear system matrix */
20: KSP ksp; /* linear solver context */
21: PetscRandom rctx; /* random number generator context */
22: PetscReal norm; /* norm of solution error */
23: PetscInt i, j, Ii, J, Istart, Iend, m, n = 7, its, nloc, matdistribute = 0;
24: PetscBool flg = PETSC_FALSE;
25: PetscScalar v;
26: PetscMPIInt rank, size;
27: #if defined(PETSC_USE_LOG)
28: PetscLogStage stage;
29: #endif
32: PetscInitialize(&argc, &args, (char *)0, help);
33: MPI_Comm_size(PETSC_COMM_WORLD, &size);
34: MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
37: PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL);
38: PetscOptionsGetInt(NULL, NULL, "-matdistribute", &matdistribute, NULL);
39: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
40: Compute the matrix and right-hand-side vector that define
41: the linear system, Ax = b.
42: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
43: switch (matdistribute) {
44: case 1: /* very imbalanced process load for matrix A */
45: m = (1 + size) * size;
46: nloc = (rank + 1) * n;
47: if (rank == size - 1) { /* proc[size-1] stores all remaining rows */
48: nloc = m * n;
49: for (i = 0; i < size - 1; i++) nloc -= (i + 1) * n;
50: }
51: break;
52: default: /* proc[0] and proc[1] load much smaller row blocks, the rest processes have same loads */
53: if (rank == 0 || rank == 1) {
54: nloc = n;
55: } else {
56: nloc = 10 * n; /* 10x larger load */
57: }
58: m = 2 + (size - 2) * 10;
59: break;
60: }
61: MatCreate(PETSC_COMM_WORLD, &A);
62: MatSetSizes(A, nloc, nloc, PETSC_DECIDE, PETSC_DECIDE);
63: MatSetFromOptions(A);
64: MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL);
65: MatSeqAIJSetPreallocation(A, 5, NULL);
66: MatSetUp(A);
68: MatGetOwnershipRange(A, &Istart, &Iend);
69: nloc = Iend - Istart;
70: PetscSynchronizedPrintf(PETSC_COMM_WORLD, "[%d] A Istart,Iend: %" PetscInt_FMT " %" PetscInt_FMT "; nloc %" PetscInt_FMT "\n", rank, Istart, Iend, nloc);
71: PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT);
73: PetscLogStageRegister("Assembly", &stage);
74: PetscLogStagePush(stage);
75: for (Ii = Istart; Ii < Iend; Ii++) {
76: v = -1.0;
77: i = Ii / n;
78: j = Ii - i * n;
79: if (i > 0) {
80: J = Ii - n;
81: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
82: }
83: if (i < m - 1) {
84: J = Ii + n;
85: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
86: }
87: if (j > 0) {
88: J = Ii - 1;
89: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
90: }
91: if (j < n - 1) {
92: J = Ii + 1;
93: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
94: }
95: v = 4.0;
96: MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
97: }
98: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
99: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
100: PetscLogStagePop();
102: /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
103: MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE);
105: /* Create parallel vectors. */
106: VecCreate(PETSC_COMM_WORLD, &u);
107: VecSetSizes(u, nloc, PETSC_DECIDE);
108: VecSetFromOptions(u);
109: VecDuplicate(u, &b);
110: VecDuplicate(b, &x);
112: /* Set exact solution; then compute right-hand-side vector. */
113: PetscOptionsGetBool(NULL, NULL, "-random_exact_sol", &flg, NULL);
114: if (flg) {
115: PetscRandomCreate(PETSC_COMM_WORLD, &rctx);
116: PetscRandomSetFromOptions(rctx);
117: VecSetRandom(u, rctx);
118: PetscRandomDestroy(&rctx);
119: } else {
120: VecSet(u, 1.0);
121: }
122: MatMult(A, u, b);
124: /* View the exact solution vector if desired */
125: flg = PETSC_FALSE;
126: PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL);
127: if (flg) VecView(u, PETSC_VIEWER_STDOUT_WORLD);
129: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
130: Create the linear solver and set various options
131: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
132: KSPCreate(PETSC_COMM_WORLD, &ksp);
133: KSPSetOperators(ksp, A, A);
134: KSPSetTolerances(ksp, 1.e-2 / ((m + 1) * (n + 1)), PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT);
135: KSPSetFromOptions(ksp);
137: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
138: Solve the linear system
139: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
140: KSPSolve(ksp, b, x);
142: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
143: Check solution and clean up
144: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
145: VecAXPY(x, -1.0, u);
146: VecNorm(x, NORM_2, &norm);
147: KSPGetIterationNumber(ksp, &its);
148: PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its);
150: /* Free work space. */
151: KSPDestroy(&ksp);
152: VecDestroy(&u);
153: VecDestroy(&x);
154: VecDestroy(&b);
155: MatDestroy(&A);
156: PetscFinalize();
157: return 0;
158: }
160: /*TEST
162: test:
163: nsize: 8
164: args: -n 100 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8
166: test:
167: suffix: 2
168: nsize: 8
169: args: -n 100 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8
171: TEST*/