Actual source code: ex192.c

  1: static char help[] = "Tests MatSolve() and MatMatSolve() with MUMPS or MKL_PARDISO sequential solvers in Schur complement mode.\n\
  2: Example: mpiexec -n 1 ./ex192 -f <matrix binary file> -nrhs 4 -symmetric_solve -hermitian_solve -schur_ratio 0.3\n\n";

  4: #include <petscmat.h>

  6: int main(int argc, char **args)
  7: {
  8:   Mat         A, RHS, C, F, X, S;
  9:   Vec         u, x, b;
 10:   Vec         xschur, bschur, uschur;
 11:   IS          is_schur;
 12:   PetscMPIInt size;
 13:   PetscInt    isolver = 0, size_schur, m, n, nfact, nsolve, nrhs;
 14:   PetscReal   norm, tol = PETSC_SQRT_MACHINE_EPSILON;
 15:   PetscRandom rand;
 16:   PetscBool   data_provided, herm, symm, use_lu, cuda = PETSC_FALSE;
 17:   PetscReal   sratio = 5.1 / 12.;
 18:   PetscViewer fd; /* viewer */
 19:   char        solver[256];
 20:   char        file[PETSC_MAX_PATH_LEN]; /* input file name */

 23:   PetscInitialize(&argc, &args, (char *)0, help);
 24:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
 26:   /* Determine which type of solver we want to test for */
 27:   herm = PETSC_FALSE;
 28:   symm = PETSC_FALSE;
 29:   PetscOptionsGetBool(NULL, NULL, "-symmetric_solve", &symm, NULL);
 30:   PetscOptionsGetBool(NULL, NULL, "-hermitian_solve", &herm, NULL);
 31:   if (herm) symm = PETSC_TRUE;
 32:   PetscOptionsGetBool(NULL, NULL, "-cuda_solve", &cuda, NULL);
 33:   PetscOptionsGetReal(NULL, NULL, "-tol", &tol, NULL);

 35:   /* Determine file from which we read the matrix A */
 36:   PetscOptionsGetString(NULL, NULL, "-f", file, sizeof(file), &data_provided);
 37:   if (!data_provided) { /* get matrices from PETSc distribution */
 38:     PetscStrncpy(file, "${PETSC_DIR}/share/petsc/datafiles/matrices/", sizeof(file));
 39:     if (symm) {
 40: #if defined(PETSC_USE_COMPLEX)
 41:       PetscStrlcat(file, "hpd-complex-", sizeof(file));
 42: #else
 43:       PetscStrlcat(file, "spd-real-", sizeof(file));
 44: #endif
 45:     } else {
 46: #if defined(PETSC_USE_COMPLEX)
 47:       PetscStrlcat(file, "nh-complex-", sizeof(file));
 48: #else
 49:       PetscStrlcat(file, "ns-real-", sizeof(file));
 50: #endif
 51:     }
 52: #if defined(PETSC_USE_64BIT_INDICES)
 53:     PetscStrlcat(file, "int64-", sizeof(file));
 54: #else
 55:     PetscStrlcat(file, "int32-", sizeof(file));
 56: #endif
 57: #if defined(PETSC_USE_REAL_SINGLE)
 58:     PetscStrlcat(file, "float32", sizeof(file));
 59: #else
 60:     PetscStrlcat(file, "float64", sizeof(file));
 61: #endif
 62:   }
 63:   /* Load matrix A */
 64:   PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &fd);
 65:   MatCreate(PETSC_COMM_WORLD, &A);
 66:   MatLoad(A, fd);
 67:   PetscViewerDestroy(&fd);
 68:   MatGetSize(A, &m, &n);

 71:   /* Create dense matrix C and X; C holds true solution with identical columns */
 72:   nrhs = 2;
 73:   PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL);
 74:   MatCreate(PETSC_COMM_WORLD, &C);
 75:   MatSetSizes(C, m, PETSC_DECIDE, PETSC_DECIDE, nrhs);
 76:   MatSetType(C, MATDENSE);
 77:   MatSetFromOptions(C);
 78:   MatSetUp(C);

 80:   PetscRandomCreate(PETSC_COMM_WORLD, &rand);
 81:   PetscRandomSetFromOptions(rand);
 82:   MatSetRandom(C, rand);
 83:   MatDuplicate(C, MAT_DO_NOT_COPY_VALUES, &X);

 85:   /* Create vectors */
 86:   VecCreate(PETSC_COMM_WORLD, &x);
 87:   VecSetSizes(x, n, PETSC_DECIDE);
 88:   VecSetFromOptions(x);
 89:   VecDuplicate(x, &b);
 90:   VecDuplicate(x, &u); /* save the true solution */

 92:   PetscOptionsGetInt(NULL, NULL, "-solver", &isolver, NULL);
 93:   switch (isolver) {
 94: #if defined(PETSC_HAVE_MUMPS)
 95:   case 0:
 96:     PetscStrcpy(solver, MATSOLVERMUMPS);
 97:     break;
 98: #endif
 99: #if defined(PETSC_HAVE_MKL_PARDISO)
100:   case 1:
101:     PetscStrcpy(solver, MATSOLVERMKL_PARDISO);
102:     break;
103: #endif
104:   default:
105:     PetscStrcpy(solver, MATSOLVERPETSC);
106:     break;
107:   }

109: #if defined(PETSC_USE_COMPLEX)
110:   if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for hermitian matrices, so make them symmetric */
111:     PetscScalar im  = PetscSqrtScalar((PetscScalar)-1.);
112:     PetscScalar val = -1.0;
113:     val             = val + im;
114:     MatSetValue(A, 1, 0, val, INSERT_VALUES);
115:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
116:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
117:   }
118: #endif

120:   PetscOptionsGetReal(NULL, NULL, "-schur_ratio", &sratio, NULL);
122:   size_schur = (PetscInt)(sratio * m);

124:   PetscPrintf(PETSC_COMM_SELF, "Solving with %s: nrhs %" PetscInt_FMT ", sym %d, herm %d, size schur %" PetscInt_FMT ", size mat %" PetscInt_FMT "\n", solver, nrhs, symm, herm, size_schur, m);

126:   /* Test LU/Cholesky Factorization */
127:   use_lu = PETSC_FALSE;
128:   if (!symm) use_lu = PETSC_TRUE;
129: #if defined(PETSC_USE_COMPLEX)
130:   if (isolver == 1) use_lu = PETSC_TRUE;
131: #endif
132:   if (cuda && symm && !herm) use_lu = PETSC_TRUE;

134:   if (herm && !use_lu) { /* test also conversion routines inside the solver packages */
135:     MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE);
136:     MatConvert(A, MATSEQSBAIJ, MAT_INPLACE_MATRIX, &A);
137:   }

139:   if (use_lu) {
140:     MatGetFactor(A, solver, MAT_FACTOR_LU, &F);
141:   } else {
142:     if (herm) {
143:       MatSetOption(A, MAT_SPD, PETSC_TRUE);
144:     } else {
145:       MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE);
146:       MatSetOption(A, MAT_SPD, PETSC_FALSE);
147:     }
148:     MatGetFactor(A, solver, MAT_FACTOR_CHOLESKY, &F);
149:   }
150:   ISCreateStride(PETSC_COMM_SELF, size_schur, m - size_schur, 1, &is_schur);
151:   MatFactorSetSchurIS(F, is_schur);

153:   ISDestroy(&is_schur);
154:   if (use_lu) {
155:     MatLUFactorSymbolic(F, A, NULL, NULL, NULL);
156:   } else {
157:     MatCholeskyFactorSymbolic(F, A, NULL, NULL);
158:   }

160:   for (nfact = 0; nfact < 3; nfact++) {
161:     Mat AD;

163:     if (!nfact) {
164:       VecSetRandom(x, rand);
165:       if (symm && herm) VecAbs(x);
166:       MatDiagonalSet(A, x, ADD_VALUES);
167:     }
168:     if (use_lu) {
169:       MatLUFactorNumeric(F, A, NULL);
170:     } else {
171:       MatCholeskyFactorNumeric(F, A, NULL);
172:     }
173:     if (cuda) {
174:       MatFactorGetSchurComplement(F, &S, NULL);
175:       MatSetType(S, MATSEQDENSECUDA);
176:       MatCreateVecs(S, &xschur, &bschur);
177:       MatFactorRestoreSchurComplement(F, &S, MAT_FACTOR_SCHUR_UNFACTORED);
178:     }
179:     MatFactorCreateSchurComplement(F, &S, NULL);
180:     if (!cuda) MatCreateVecs(S, &xschur, &bschur);
181:     VecDuplicate(xschur, &uschur);
182:     if (nfact == 1 && (!cuda || (herm && symm))) MatFactorInvertSchurComplement(F);
183:     for (nsolve = 0; nsolve < 2; nsolve++) {
184:       VecSetRandom(x, rand);
185:       VecCopy(x, u);

187:       if (nsolve) {
188:         MatMult(A, x, b);
189:         MatSolve(F, b, x);
190:       } else {
191:         MatMultTranspose(A, x, b);
192:         MatSolveTranspose(F, b, x);
193:       }
194:       /* Check the error */
195:       VecAXPY(u, -1.0, x); /* u <- (-1.0)x + u */
196:       VecNorm(u, NORM_2, &norm);
197:       if (norm > tol) {
198:         PetscReal resi;
199:         if (nsolve) {
200:           MatMult(A, x, u); /* u = A*x */
201:         } else {
202:           MatMultTranspose(A, x, u); /* u = A*x */
203:         }
204:         VecAXPY(u, -1.0, b); /* u <- (-1.0)b + u */
205:         VecNorm(u, NORM_2, &resi);
206:         if (nsolve) {
207:           PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolve error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi);
208:         } else {
209:           PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolveTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi);
210:         }
211:       }
212:       VecSetRandom(xschur, rand);
213:       VecCopy(xschur, uschur);
214:       if (nsolve) {
215:         MatMult(S, xschur, bschur);
216:         MatFactorSolveSchurComplement(F, bschur, xschur);
217:       } else {
218:         MatMultTranspose(S, xschur, bschur);
219:         MatFactorSolveSchurComplementTranspose(F, bschur, xschur);
220:       }
221:       /* Check the error */
222:       VecAXPY(uschur, -1.0, xschur); /* u <- (-1.0)x + u */
223:       VecNorm(uschur, NORM_2, &norm);
224:       if (norm > tol) {
225:         PetscReal resi;
226:         if (nsolve) {
227:           MatMult(S, xschur, uschur); /* u = A*x */
228:         } else {
229:           MatMultTranspose(S, xschur, uschur); /* u = A*x */
230:         }
231:         VecAXPY(uschur, -1.0, bschur); /* u <- (-1.0)b + u */
232:         VecNorm(uschur, NORM_2, &resi);
233:         if (nsolve) {
234:           PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplement error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi);
235:         } else {
236:           PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi);
237:         }
238:       }
239:     }
240:     MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &AD);
241:     if (!nfact) {
242:       MatMatMult(AD, C, MAT_INITIAL_MATRIX, 2.0, &RHS);
243:     } else {
244:       MatMatMult(AD, C, MAT_REUSE_MATRIX, 2.0, &RHS);
245:     }
246:     MatDestroy(&AD);
247:     for (nsolve = 0; nsolve < 2; nsolve++) {
248:       MatMatSolve(F, RHS, X);

250:       /* Check the error */
251:       MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN);
252:       MatNorm(X, NORM_FROBENIUS, &norm);
253:       if (norm > tol) PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm);
254:     }
255:     if (isolver == 0) {
256:       Mat spRHS, spRHST, RHST;

258:       MatTranspose(RHS, MAT_INITIAL_MATRIX, &RHST);
259:       MatConvert(RHST, MATSEQAIJ, MAT_INITIAL_MATRIX, &spRHST);
260:       MatCreateTranspose(spRHST, &spRHS);
261:       for (nsolve = 0; nsolve < 2; nsolve++) {
262:         MatMatSolve(F, spRHS, X);

264:         /* Check the error */
265:         MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN);
266:         MatNorm(X, NORM_FROBENIUS, &norm);
267:         if (norm > tol) PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") sparse MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm);
268:       }
269:       MatDestroy(&spRHST);
270:       MatDestroy(&spRHS);
271:       MatDestroy(&RHST);
272:     }
273:     MatDestroy(&S);
274:     VecDestroy(&xschur);
275:     VecDestroy(&bschur);
276:     VecDestroy(&uschur);
277:   }
278:   /* Free data structures */
279:   MatDestroy(&A);
280:   MatDestroy(&C);
281:   MatDestroy(&F);
282:   MatDestroy(&X);
283:   MatDestroy(&RHS);
284:   PetscRandomDestroy(&rand);
285:   VecDestroy(&x);
286:   VecDestroy(&b);
287:   VecDestroy(&u);
288:   PetscFinalize();
289:   return 0;
290: }

292: /*TEST

294:    testset:
295:      requires: mkl_pardiso double !complex
296:      args: -solver 1

298:      test:
299:        suffix: mkl_pardiso
300:      test:
301:        requires: cuda
302:        suffix: mkl_pardiso_cuda
303:        args: -cuda_solve
304:        output_file: output/ex192_mkl_pardiso.out
305:      test:
306:        suffix: mkl_pardiso_1
307:        args: -symmetric_solve
308:        output_file: output/ex192_mkl_pardiso_1.out
309:      test:
310:        requires: cuda
311:        suffix: mkl_pardiso_cuda_1
312:        args: -symmetric_solve -cuda_solve
313:        output_file: output/ex192_mkl_pardiso_1.out
314:      test:
315:        suffix: mkl_pardiso_3
316:        args: -symmetric_solve -hermitian_solve
317:        output_file: output/ex192_mkl_pardiso_3.out
318:      test:
319:        requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
320:        suffix: mkl_pardiso_cuda_3
321:        args: -symmetric_solve -hermitian_solve -cuda_solve
322:        output_file: output/ex192_mkl_pardiso_3.out

324:    testset:
325:      requires: mumps double !complex
326:      args: -solver 0

328:      test:
329:        suffix: mumps
330:      test:
331:        requires: cuda
332:        suffix: mumps_cuda
333:        args: -cuda_solve
334:        output_file: output/ex192_mumps.out
335:      test:
336:        suffix: mumps_2
337:        args: -symmetric_solve
338:        output_file: output/ex192_mumps_2.out
339:      test:
340:        requires: cuda
341:        suffix: mumps_cuda_2
342:        args: -symmetric_solve -cuda_solve
343:        output_file: output/ex192_mumps_2.out
344:      test:
345:        suffix: mumps_3
346:        args: -symmetric_solve -hermitian_solve
347:        output_file: output/ex192_mumps_3.out
348:      test:
349:        requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
350:        suffix: mumps_cuda_3
351:        args: -symmetric_solve -hermitian_solve -cuda_solve
352:        output_file: output/ex192_mumps_3.out

354: TEST*/