Actual source code: bddcschurs.c
1: #include <petsc/private/pcbddcimpl.h>
2: #include <petsc/private/pcbddcprivateimpl.h>
3: #include <../src/mat/impls/dense/seq/dense.h>
4: #include <petscblaslapack.h>
6: static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *, PetscInt, PetscBT, PetscInt *, PetscInt *, PetscInt *);
7: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat, PetscBool, MatReuse, Mat *);
8: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC, Vec, Vec);
9: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC, Vec, Vec);
11: /* if v2 is not present, correction is done in-place */
12: PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13: {
14: PetscScalar *array;
15: PetscScalar *array2;
17: if (!ctx->benign_n) return 0;
18: if (sol && full) {
19: PetscInt n_I, size_schur;
21: /* get sizes */
22: MatGetSize(ctx->benign_csAIB, &size_schur, NULL);
23: VecGetSize(v, &n_I);
24: n_I = n_I - size_schur;
25: /* get schur sol from array */
26: VecGetArray(v, &array);
27: VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I);
28: VecRestoreArray(v, &array);
29: /* apply interior sol correction */
30: MatMultTranspose(ctx->benign_csAIB, ctx->benign_dummy_schur_vec, ctx->benign_corr_work);
31: VecResetArray(ctx->benign_dummy_schur_vec);
32: MatMultAdd(ctx->benign_AIIm1ones, ctx->benign_corr_work, v, v);
33: }
34: if (v2) {
35: PetscInt nl;
37: VecGetArrayRead(v, (const PetscScalar **)&array);
38: VecGetLocalSize(v2, &nl);
39: VecGetArray(v2, &array2);
40: PetscArraycpy(array2, array, nl);
41: } else {
42: VecGetArray(v, &array);
43: array2 = array;
44: }
45: if (!sol) { /* change rhs */
46: PetscInt n;
47: for (n = 0; n < ctx->benign_n; n++) {
48: PetscScalar sum = 0.;
49: const PetscInt *cols;
50: PetscInt nz, i;
52: ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz);
53: ISGetIndices(ctx->benign_zerodiag_subs[n], &cols);
54: for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
55: #if defined(PETSC_USE_COMPLEX)
56: sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
57: #else
58: sum = -sum / nz;
59: #endif
60: for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
61: ctx->benign_save_vals[n] = array2[cols[nz - 1]];
62: array2[cols[nz - 1]] = sum;
63: ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols);
64: }
65: } else {
66: PetscInt n;
67: for (n = 0; n < ctx->benign_n; n++) {
68: PetscScalar sum = 0.;
69: const PetscInt *cols;
70: PetscInt nz, i;
71: ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz);
72: ISGetIndices(ctx->benign_zerodiag_subs[n], &cols);
73: for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
74: #if defined(PETSC_USE_COMPLEX)
75: sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
76: #else
77: sum = -sum / nz;
78: #endif
79: for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
80: array2[cols[nz - 1]] = ctx->benign_save_vals[n];
81: ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols);
82: }
83: }
84: if (v2) {
85: VecRestoreArrayRead(v, (const PetscScalar **)&array);
86: VecRestoreArray(v2, &array2);
87: } else {
88: VecRestoreArray(v, &array);
89: }
90: if (!sol && full) {
91: Vec usedv;
92: PetscInt n_I, size_schur;
94: /* get sizes */
95: MatGetSize(ctx->benign_csAIB, &size_schur, NULL);
96: VecGetSize(v, &n_I);
97: n_I = n_I - size_schur;
98: /* compute schur rhs correction */
99: if (v2) {
100: usedv = v2;
101: } else {
102: usedv = v;
103: }
104: /* apply schur rhs correction */
105: MatMultTranspose(ctx->benign_AIIm1ones, usedv, ctx->benign_corr_work);
106: VecGetArrayRead(usedv, (const PetscScalar **)&array);
107: VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I);
108: VecRestoreArrayRead(usedv, (const PetscScalar **)&array);
109: MatMultAdd(ctx->benign_csAIB, ctx->benign_corr_work, ctx->benign_dummy_schur_vec, ctx->benign_dummy_schur_vec);
110: VecResetArray(ctx->benign_dummy_schur_vec);
111: }
112: return 0;
113: }
115: static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
116: {
117: PCBDDCReuseSolvers ctx;
118: PetscBool copy = PETSC_FALSE;
120: PCShellGetContext(pc, &ctx);
121: if (full) {
122: #if defined(PETSC_HAVE_MUMPS)
123: MatMumpsSetIcntl(ctx->F, 26, -1);
124: #endif
125: #if defined(PETSC_HAVE_MKL_PARDISO)
126: MatMkl_PardisoSetCntl(ctx->F, 70, 0);
127: #endif
128: copy = ctx->has_vertices;
129: } else { /* interior solver */
130: #if defined(PETSC_HAVE_MUMPS)
131: MatMumpsSetIcntl(ctx->F, 26, 0);
132: #endif
133: #if defined(PETSC_HAVE_MKL_PARDISO)
134: MatMkl_PardisoSetCntl(ctx->F, 70, 1);
135: #endif
136: copy = PETSC_TRUE;
137: }
138: /* copy rhs into factored matrix workspace */
139: if (copy) {
140: PetscInt n;
141: PetscScalar *array, *array_solver;
143: VecGetLocalSize(rhs, &n);
144: VecGetArrayRead(rhs, (const PetscScalar **)&array);
145: VecGetArray(ctx->rhs, &array_solver);
146: PetscArraycpy(array_solver, array, n);
147: VecRestoreArray(ctx->rhs, &array_solver);
148: VecRestoreArrayRead(rhs, (const PetscScalar **)&array);
150: PCBDDCReuseSolversBenignAdapt(ctx, ctx->rhs, NULL, PETSC_FALSE, full);
151: if (transpose) {
152: MatSolveTranspose(ctx->F, ctx->rhs, ctx->sol);
153: } else {
154: MatSolve(ctx->F, ctx->rhs, ctx->sol);
155: }
156: PCBDDCReuseSolversBenignAdapt(ctx, ctx->sol, NULL, PETSC_TRUE, full);
158: /* get back data to caller worskpace */
159: VecGetArrayRead(ctx->sol, (const PetscScalar **)&array_solver);
160: VecGetArray(sol, &array);
161: PetscArraycpy(array, array_solver, n);
162: VecRestoreArray(sol, &array);
163: VecRestoreArrayRead(ctx->sol, (const PetscScalar **)&array_solver);
164: } else {
165: if (ctx->benign_n) {
166: PCBDDCReuseSolversBenignAdapt(ctx, rhs, ctx->rhs, PETSC_FALSE, full);
167: if (transpose) {
168: MatSolveTranspose(ctx->F, ctx->rhs, sol);
169: } else {
170: MatSolve(ctx->F, ctx->rhs, sol);
171: }
172: PCBDDCReuseSolversBenignAdapt(ctx, sol, NULL, PETSC_TRUE, full);
173: } else {
174: if (transpose) {
175: MatSolveTranspose(ctx->F, rhs, sol);
176: } else {
177: MatSolve(ctx->F, rhs, sol);
178: }
179: }
180: }
181: /* restore defaults */
182: #if defined(PETSC_HAVE_MUMPS)
183: MatMumpsSetIcntl(ctx->F, 26, -1);
184: #endif
185: #if defined(PETSC_HAVE_MKL_PARDISO)
186: MatMkl_PardisoSetCntl(ctx->F, 70, 0);
187: #endif
188: return 0;
189: }
191: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
192: {
193: PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_TRUE);
194: return 0;
195: }
197: static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
198: {
199: PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_TRUE);
200: return 0;
201: }
203: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
204: {
205: PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_FALSE);
206: return 0;
207: }
209: static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
210: {
211: PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_FALSE);
212: return 0;
213: }
215: static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
216: {
217: PCBDDCReuseSolvers ctx;
218: PetscBool iascii;
220: PCShellGetContext(pc, &ctx);
221: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
222: if (iascii) PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);
223: MatView(ctx->F, viewer);
224: if (iascii) PetscViewerPopFormat(viewer);
225: return 0;
226: }
228: static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
229: {
230: PetscInt i;
232: MatDestroy(&reuse->F);
233: VecDestroy(&reuse->sol);
234: VecDestroy(&reuse->rhs);
235: PCDestroy(&reuse->interior_solver);
236: PCDestroy(&reuse->correction_solver);
237: ISDestroy(&reuse->is_R);
238: ISDestroy(&reuse->is_B);
239: VecScatterDestroy(&reuse->correction_scatter_B);
240: VecDestroy(&reuse->sol_B);
241: VecDestroy(&reuse->rhs_B);
242: for (i = 0; i < reuse->benign_n; i++) ISDestroy(&reuse->benign_zerodiag_subs[i]);
243: PetscFree(reuse->benign_zerodiag_subs);
244: PetscFree(reuse->benign_save_vals);
245: MatDestroy(&reuse->benign_csAIB);
246: MatDestroy(&reuse->benign_AIIm1ones);
247: VecDestroy(&reuse->benign_corr_work);
248: VecDestroy(&reuse->benign_dummy_schur_vec);
249: return 0;
250: }
252: static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc)
253: {
254: PCBDDCReuseSolvers ctx;
256: PCShellGetContext(pc, &ctx);
257: PCBDDCReuseSolversReset(ctx);
258: PetscFree(ctx);
259: PCShellSetContext(pc, NULL);
260: return 0;
261: }
263: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
264: {
265: Mat B, C, D, Bd, Cd, AinvBd;
266: KSP ksp;
267: PC pc;
268: PetscBool isLU, isILU, isCHOL, Bdense, Cdense;
269: PetscReal fill = 2.0;
270: PetscInt n_I;
271: PetscMPIInt size;
273: MPI_Comm_size(PetscObjectComm((PetscObject)M), &size);
275: if (reuse == MAT_REUSE_MATRIX) {
276: PetscBool Sdense;
278: PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);
280: }
281: MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);
282: MatSchurComplementGetKSP(M, &ksp);
283: KSPGetPC(ksp, &pc);
284: PetscObjectTypeCompare((PetscObject)pc, PCLU, &isLU);
285: PetscObjectTypeCompare((PetscObject)pc, PCILU, &isILU);
286: PetscObjectTypeCompare((PetscObject)pc, PCCHOLESKY, &isCHOL);
287: PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &Bdense);
288: PetscObjectTypeCompare((PetscObject)C, MATSEQDENSE, &Cdense);
289: MatGetSize(B, &n_I, NULL);
290: if (n_I) {
291: if (!Bdense) {
292: MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);
293: } else {
294: Bd = B;
295: }
297: if (isLU || isILU || isCHOL) {
298: Mat fact;
299: KSPSetUp(ksp);
300: PCFactorGetMatrix(pc, &fact);
301: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
302: MatMatSolve(fact, Bd, AinvBd);
303: } else {
304: PetscBool ex = PETSC_TRUE;
306: if (ex) {
307: Mat Ainvd;
309: PCComputeOperator(pc, MATDENSE, &Ainvd);
310: MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);
311: MatDestroy(&Ainvd);
312: } else {
313: Vec sol, rhs;
314: PetscScalar *arrayrhs, *arraysol;
315: PetscInt i, nrhs, n;
317: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
318: MatGetSize(Bd, &n, &nrhs);
319: MatDenseGetArray(Bd, &arrayrhs);
320: MatDenseGetArray(AinvBd, &arraysol);
321: KSPGetSolution(ksp, &sol);
322: KSPGetRhs(ksp, &rhs);
323: for (i = 0; i < nrhs; i++) {
324: VecPlaceArray(rhs, arrayrhs + i * n);
325: VecPlaceArray(sol, arraysol + i * n);
326: KSPSolve(ksp, rhs, sol);
327: VecResetArray(rhs);
328: VecResetArray(sol);
329: }
330: MatDenseRestoreArray(Bd, &arrayrhs);
331: MatDenseRestoreArray(AinvBd, &arrayrhs);
332: }
333: }
334: if (!Bdense & !issym) MatDestroy(&Bd);
336: if (!issym) {
337: if (!Cdense) {
338: MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);
339: } else {
340: Cd = C;
341: }
342: MatMatMult(Cd, AinvBd, reuse, fill, S);
343: if (!Cdense) MatDestroy(&Cd);
344: } else {
345: MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);
346: if (!Bdense) MatDestroy(&Bd);
347: }
348: MatDestroy(&AinvBd);
349: }
351: if (D) {
352: Mat Dd;
353: PetscBool Ddense;
355: PetscObjectTypeCompare((PetscObject)D, MATSEQDENSE, &Ddense);
356: if (!Ddense) {
357: MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);
358: } else {
359: Dd = D;
360: }
361: if (n_I) {
362: MatAYPX(*S, -1.0, Dd, SAME_NONZERO_PATTERN);
363: } else {
364: if (reuse == MAT_INITIAL_MATRIX) {
365: MatDuplicate(Dd, MAT_COPY_VALUES, S);
366: } else {
367: MatCopy(Dd, *S, SAME_NONZERO_PATTERN);
368: }
369: }
370: if (!Ddense) MatDestroy(&Dd);
371: } else {
372: MatScale(*S, -1.0);
373: }
374: return 0;
375: }
377: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
378: {
379: Mat F, A_II, A_IB, A_BI, A_BB, AE_II;
380: Mat S_all;
381: Vec gstash, lstash;
382: VecScatter sstash;
383: IS is_I, is_I_layer;
384: IS all_subsets, all_subsets_mult, all_subsets_n;
385: PetscScalar *stasharray, *Bwork;
386: PetscInt *nnz, *all_local_idx_N;
387: PetscInt *auxnum1, *auxnum2;
388: PetscInt i, subset_size, max_subset_size;
389: PetscInt n_B, extra, local_size, global_size;
390: PetscInt local_stash_size;
391: PetscBLASInt B_N, B_ierr, B_lwork, *pivots;
392: MPI_Comm comm_n;
393: PetscBool deluxe = PETSC_TRUE;
394: PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
395: PetscViewer matl_dbg_viewer = NULL;
396: PetscBool flg;
398: MatDestroy(&sub_schurs->A);
399: MatDestroy(&sub_schurs->S);
400: if (Ain) {
401: PetscObjectReference((PetscObject)Ain);
402: sub_schurs->A = Ain;
403: }
405: PetscObjectReference((PetscObject)Sin);
406: sub_schurs->S = Sin;
407: if (sub_schurs->schur_explicit) sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
409: /* preliminary checks */
412: if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
414: /* debug (MATLAB) */
415: if (sub_schurs->debug) {
416: PetscMPIInt size, rank;
417: PetscInt nr, *print_schurs_ranks, print_schurs = PETSC_FALSE;
419: MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &size);
420: MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank);
421: nr = size;
422: PetscMalloc1(nr, &print_schurs_ranks);
423: PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
424: PetscOptionsIntArray("-sub_schurs_debug_ranks", "Ranks to debug (all if the option is not used)", NULL, print_schurs_ranks, &nr, &flg);
425: if (!flg) print_schurs = PETSC_TRUE;
426: else {
427: print_schurs = PETSC_FALSE;
428: for (i = 0; i < nr; i++)
429: if (print_schurs_ranks[i] == (PetscInt)rank) {
430: print_schurs = PETSC_TRUE;
431: break;
432: }
433: }
434: PetscOptionsEnd();
435: PetscFree(print_schurs_ranks);
436: if (print_schurs) {
437: char filename[256];
439: PetscSNPrintf(filename, sizeof(filename), "sub_schurs_Schur_r%d.m", PetscGlobalRank);
440: PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &matl_dbg_viewer);
441: PetscViewerPushFormat(matl_dbg_viewer, PETSC_VIEWER_ASCII_MATLAB);
442: }
443: }
445: /* restrict work on active processes */
446: if (sub_schurs->restrict_comm) {
447: PetscSubcomm subcomm;
448: PetscMPIInt color, rank;
450: color = 0;
451: if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
452: MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank);
453: PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &subcomm);
454: PetscSubcommSetNumber(subcomm, 2);
455: PetscSubcommSetTypeGeneral(subcomm, color, rank);
456: PetscCommDuplicate(PetscSubcommChild(subcomm), &comm_n, NULL);
457: PetscSubcommDestroy(&subcomm);
458: if (!sub_schurs->n_subs) {
459: PetscCommDestroy(&comm_n);
460: return 0;
461: }
462: } else {
463: PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &comm_n, NULL);
464: }
466: /* get Schur complement matrices */
467: if (!sub_schurs->schur_explicit) {
468: Mat tA_IB, tA_BI, tA_BB;
469: PetscBool isseqsbaij;
470: MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, NULL, &tA_IB, &tA_BI, &tA_BB);
471: PetscObjectTypeCompare((PetscObject)tA_BB, MATSEQSBAIJ, &isseqsbaij);
472: if (isseqsbaij) {
473: MatConvert(tA_BB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BB);
474: MatConvert(tA_IB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_IB);
475: MatConvert(tA_BI, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BI);
476: } else {
477: PetscObjectReference((PetscObject)tA_BB);
478: A_BB = tA_BB;
479: PetscObjectReference((PetscObject)tA_IB);
480: A_IB = tA_IB;
481: PetscObjectReference((PetscObject)tA_BI);
482: A_BI = tA_BI;
483: }
484: } else {
485: A_II = NULL;
486: A_IB = NULL;
487: A_BI = NULL;
488: A_BB = NULL;
489: }
490: S_all = NULL;
492: /* determine interior problems */
493: ISGetLocalSize(sub_schurs->is_I, &i);
494: if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
495: PetscBT touched;
496: const PetscInt *idx_B;
497: PetscInt n_I, n_B, n_local_dofs, n_prev_added, j, layer, *local_numbering;
500: /* get sizes */
501: ISGetLocalSize(sub_schurs->is_I, &n_I);
502: ISGetLocalSize(sub_schurs->is_B, &n_B);
504: PetscMalloc1(n_I + n_B, &local_numbering);
505: PetscBTCreate(n_I + n_B, &touched);
506: PetscBTMemzero(n_I + n_B, touched);
508: /* all boundary dofs must be skipped when adding layers */
509: ISGetIndices(sub_schurs->is_B, &idx_B);
510: for (j = 0; j < n_B; j++) PetscBTSet(touched, idx_B[j]);
511: PetscArraycpy(local_numbering, idx_B, n_B);
512: ISRestoreIndices(sub_schurs->is_B, &idx_B);
514: /* add prescribed number of layers of dofs */
515: n_local_dofs = n_B;
516: n_prev_added = n_B;
517: for (layer = 0; layer < nlayers; layer++) {
518: PetscInt n_added = 0;
519: if (n_local_dofs == n_I + n_B) break;
521: PCBDDCAdjGetNextLayer_Private(local_numbering + n_local_dofs, n_prev_added, touched, xadj, adjncy, &n_added);
522: n_prev_added = n_added;
523: n_local_dofs += n_added;
524: if (!n_added) break;
525: }
526: PetscBTDestroy(&touched);
528: /* IS for I layer dofs in original numbering */
529: ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I), n_local_dofs - n_B, local_numbering + n_B, PETSC_COPY_VALUES, &is_I_layer);
530: PetscFree(local_numbering);
531: ISSort(is_I_layer);
532: /* IS for I layer dofs in I numbering */
533: if (!sub_schurs->schur_explicit) {
534: ISLocalToGlobalMapping ItoNmap;
535: ISLocalToGlobalMappingCreateIS(sub_schurs->is_I, &ItoNmap);
536: ISGlobalToLocalMappingApplyIS(ItoNmap, IS_GTOLM_DROP, is_I_layer, &is_I);
537: ISLocalToGlobalMappingDestroy(&ItoNmap);
539: /* II block */
540: MatCreateSubMatrix(A_II, is_I, is_I, MAT_INITIAL_MATRIX, &AE_II);
541: }
542: } else {
543: PetscInt n_I;
545: /* IS for I dofs in original numbering */
546: PetscObjectReference((PetscObject)sub_schurs->is_I);
547: is_I_layer = sub_schurs->is_I;
549: /* IS for I dofs in I numbering (strided 1) */
550: if (!sub_schurs->schur_explicit) {
551: ISGetSize(sub_schurs->is_I, &n_I);
552: ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I), n_I, 0, 1, &is_I);
554: /* II block is the same */
555: PetscObjectReference((PetscObject)A_II);
556: AE_II = A_II;
557: }
558: }
560: /* Get info on subset sizes and sum of all subsets sizes */
561: max_subset_size = 0;
562: local_size = 0;
563: for (i = 0; i < sub_schurs->n_subs; i++) {
564: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
565: max_subset_size = PetscMax(subset_size, max_subset_size);
566: local_size += subset_size;
567: }
569: /* Work arrays for local indices */
570: extra = 0;
571: ISGetLocalSize(sub_schurs->is_B, &n_B);
572: if (sub_schurs->schur_explicit && is_I_layer) ISGetLocalSize(is_I_layer, &extra);
573: PetscMalloc1(n_B + extra, &all_local_idx_N);
574: if (extra) {
575: const PetscInt *idxs;
576: ISGetIndices(is_I_layer, &idxs);
577: PetscArraycpy(all_local_idx_N, idxs, extra);
578: ISRestoreIndices(is_I_layer, &idxs);
579: }
580: PetscMalloc1(sub_schurs->n_subs, &auxnum1);
581: PetscMalloc1(sub_schurs->n_subs, &auxnum2);
583: /* Get local indices in local numbering */
584: local_size = 0;
585: local_stash_size = 0;
586: for (i = 0; i < sub_schurs->n_subs; i++) {
587: const PetscInt *idxs;
589: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
590: ISGetIndices(sub_schurs->is_subs[i], &idxs);
591: /* start (smallest in global ordering) and multiplicity */
592: auxnum1[i] = idxs[0];
593: auxnum2[i] = subset_size * subset_size;
594: /* subset indices in local numbering */
595: PetscArraycpy(all_local_idx_N + local_size + extra, idxs, subset_size);
596: ISRestoreIndices(sub_schurs->is_subs[i], &idxs);
597: local_size += subset_size;
598: local_stash_size += subset_size * subset_size;
599: }
601: /* allocate extra workspace needed only for GETRI or SYTRF */
602: use_potr = use_sytr = PETSC_FALSE;
603: if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
604: use_potr = PETSC_TRUE;
605: } else if (sub_schurs->is_symmetric) {
606: use_sytr = PETSC_TRUE;
607: }
608: if (local_size && !use_potr) {
609: PetscScalar lwork, dummyscalar = 0.;
610: PetscBLASInt dummyint = 0;
612: B_lwork = -1;
613: PetscBLASIntCast(local_size, &B_N);
614: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
615: if (use_sytr) {
616: PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
618: } else {
619: PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
621: }
622: PetscFPTrapPop();
623: PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork);
624: PetscMalloc2(B_lwork, &Bwork, B_N, &pivots);
625: } else {
626: Bwork = NULL;
627: pivots = NULL;
628: }
630: /* prepare data for summing up properly schurs on subsets */
631: ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum1, PETSC_OWN_POINTER, &all_subsets_n);
632: ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap, all_subsets_n, &all_subsets);
633: ISDestroy(&all_subsets_n);
634: ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum2, PETSC_OWN_POINTER, &all_subsets_mult);
635: ISRenumber(all_subsets, all_subsets_mult, &global_size, &all_subsets_n);
636: ISDestroy(&all_subsets);
637: ISDestroy(&all_subsets_mult);
638: ISGetLocalSize(all_subsets_n, &i);
640: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, local_stash_size, NULL, &lstash);
641: VecCreateMPI(comm_n, PETSC_DECIDE, global_size, &gstash);
642: VecScatterCreate(lstash, NULL, gstash, all_subsets_n, &sstash);
643: ISDestroy(&all_subsets_n);
645: /* subset indices in local boundary numbering */
646: if (!sub_schurs->is_Ej_all) {
647: PetscInt *all_local_idx_B;
649: PetscMalloc1(local_size, &all_local_idx_B);
650: ISGlobalToLocalMappingApply(sub_schurs->BtoNmap, IS_GTOLM_DROP, local_size, all_local_idx_N + extra, &subset_size, all_local_idx_B);
652: ISCreateGeneral(PETSC_COMM_SELF, local_size, all_local_idx_B, PETSC_OWN_POINTER, &sub_schurs->is_Ej_all);
653: }
655: if (change) {
656: ISLocalToGlobalMapping BtoS;
657: IS change_primal_B;
658: IS change_primal_all;
662: PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change_primal_sub);
663: for (i = 0; i < sub_schurs->n_subs; i++) {
664: ISLocalToGlobalMapping NtoS;
665: ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i], &NtoS);
666: ISGlobalToLocalMappingApplyIS(NtoS, IS_GTOLM_DROP, change_primal, &sub_schurs->change_primal_sub[i]);
667: ISLocalToGlobalMappingDestroy(&NtoS);
668: }
669: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, change_primal, &change_primal_B);
670: ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all, &BtoS);
671: ISGlobalToLocalMappingApplyIS(BtoS, IS_GTOLM_DROP, change_primal_B, &change_primal_all);
672: ISLocalToGlobalMappingDestroy(&BtoS);
673: ISDestroy(&change_primal_B);
674: PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change);
675: for (i = 0; i < sub_schurs->n_subs; i++) {
676: Mat change_sub;
678: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
679: KSPCreate(PETSC_COMM_SELF, &sub_schurs->change[i]);
680: KSPSetType(sub_schurs->change[i], KSPPREONLY);
681: if (!sub_schurs->change_with_qr) {
682: MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_sub);
683: } else {
684: Mat change_subt;
685: MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_subt);
686: MatConvert(change_subt, MATSEQDENSE, MAT_INITIAL_MATRIX, &change_sub);
687: MatDestroy(&change_subt);
688: }
689: KSPSetOperators(sub_schurs->change[i], change_sub, change_sub);
690: MatDestroy(&change_sub);
691: KSPSetOptionsPrefix(sub_schurs->change[i], sub_schurs->prefix);
692: KSPAppendOptionsPrefix(sub_schurs->change[i], "sub_schurs_change_");
693: }
694: ISDestroy(&change_primal_all);
695: }
697: /* Local matrix of all local Schur on subsets (transposed) */
698: if (!sub_schurs->S_Ej_all) {
699: Mat T;
700: PetscScalar *v;
701: PetscInt *ii, *jj;
702: PetscInt cum, i, j, k;
704: /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
705: Allocate properly a representative matrix and duplicate */
706: PetscMalloc3(local_size + 1, &ii, local_stash_size, &jj, local_stash_size, &v);
707: ii[0] = 0;
708: cum = 0;
709: for (i = 0; i < sub_schurs->n_subs; i++) {
710: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
711: for (j = 0; j < subset_size; j++) {
712: const PetscInt row = cum + j;
713: PetscInt col = cum;
715: ii[row + 1] = ii[row] + subset_size;
716: for (k = ii[row]; k < ii[row + 1]; k++) {
717: jj[k] = col;
718: col++;
719: }
720: }
721: cum += subset_size;
722: }
723: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, local_size, local_size, ii, jj, v, &T);
724: MatDuplicate(T, MAT_DO_NOT_COPY_VALUES, &sub_schurs->S_Ej_all);
725: MatDestroy(&T);
726: PetscFree3(ii, jj, v);
727: }
728: /* matrices for deluxe scaling and adaptive selection */
729: if (compute_Stilda) {
730: if (!sub_schurs->sum_S_Ej_tilda_all) MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_tilda_all);
731: if (!sub_schurs->sum_S_Ej_inv_all && deluxe) MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_inv_all);
732: }
734: /* Compute Schur complements explicitly */
735: F = NULL;
736: if (!sub_schurs->schur_explicit) {
737: /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
738: it is not efficient, unless the economic version of the scaling is used */
739: Mat S_Ej_expl;
740: PetscScalar *work;
741: PetscInt j, *dummy_idx;
742: PetscBool Sdense;
744: PetscMalloc2(max_subset_size, &dummy_idx, max_subset_size * max_subset_size, &work);
745: local_size = 0;
746: for (i = 0; i < sub_schurs->n_subs; i++) {
747: IS is_subset_B;
748: Mat AE_EE, AE_IE, AE_EI, S_Ej;
750: /* subsets in original and boundary numbering */
751: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, sub_schurs->is_subs[i], &is_subset_B);
752: /* EE block */
753: MatCreateSubMatrix(A_BB, is_subset_B, is_subset_B, MAT_INITIAL_MATRIX, &AE_EE);
754: /* IE block */
755: MatCreateSubMatrix(A_IB, is_I, is_subset_B, MAT_INITIAL_MATRIX, &AE_IE);
756: /* EI block */
757: if (sub_schurs->is_symmetric) {
758: MatCreateTranspose(AE_IE, &AE_EI);
759: } else if (sub_schurs->is_hermitian) {
760: MatCreateHermitianTranspose(AE_IE, &AE_EI);
761: } else {
762: MatCreateSubMatrix(A_BI, is_subset_B, is_I, MAT_INITIAL_MATRIX, &AE_EI);
763: }
764: ISDestroy(&is_subset_B);
765: MatCreateSchurComplement(AE_II, AE_II, AE_IE, AE_EI, AE_EE, &S_Ej);
766: MatDestroy(&AE_EE);
767: MatDestroy(&AE_IE);
768: MatDestroy(&AE_EI);
769: if (AE_II == A_II) { /* we can reuse the same ksp */
770: KSP ksp;
771: MatSchurComplementGetKSP(sub_schurs->S, &ksp);
772: MatSchurComplementSetKSP(S_Ej, ksp);
773: } else { /* build new ksp object which inherits ksp and pc types from the original one */
774: KSP origksp, schurksp;
775: PC origpc, schurpc;
776: KSPType ksp_type;
777: PetscInt n_internal;
778: PetscBool ispcnone;
780: MatSchurComplementGetKSP(sub_schurs->S, &origksp);
781: MatSchurComplementGetKSP(S_Ej, &schurksp);
782: KSPGetType(origksp, &ksp_type);
783: KSPSetType(schurksp, ksp_type);
784: KSPGetPC(schurksp, &schurpc);
785: KSPGetPC(origksp, &origpc);
786: PetscObjectTypeCompare((PetscObject)origpc, PCNONE, &ispcnone);
787: if (!ispcnone) {
788: PCType pc_type;
789: PCGetType(origpc, &pc_type);
790: PCSetType(schurpc, pc_type);
791: } else {
792: PCSetType(schurpc, PCLU);
793: }
794: ISGetSize(is_I, &n_internal);
795: if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
796: MatSolverType solver = NULL;
797: PCFactorGetMatSolverType(origpc, (MatSolverType *)&solver);
798: if (solver) PCFactorSetMatSolverType(schurpc, solver);
799: }
800: KSPSetUp(schurksp);
801: }
802: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
803: MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &S_Ej_expl);
804: PCBDDCComputeExplicitSchur(S_Ej, sub_schurs->is_symmetric, MAT_REUSE_MATRIX, &S_Ej_expl);
805: PetscObjectTypeCompare((PetscObject)S_Ej_expl, MATSEQDENSE, &Sdense);
806: if (Sdense) {
807: for (j = 0; j < subset_size; j++) dummy_idx[j] = local_size + j;
808: MatSetValues(sub_schurs->S_Ej_all, subset_size, dummy_idx, subset_size, dummy_idx, work, INSERT_VALUES);
809: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented for sparse matrices");
810: MatDestroy(&S_Ej);
811: MatDestroy(&S_Ej_expl);
812: local_size += subset_size;
813: }
814: PetscFree2(dummy_idx, work);
815: /* free */
816: ISDestroy(&is_I);
817: MatDestroy(&AE_II);
818: PetscFree(all_local_idx_N);
819: } else {
820: Mat A, cs_AIB_mat = NULL, benign_AIIm1_ones_mat = NULL;
821: Mat *gdswA;
822: Vec Dall = NULL;
823: IS is_A_all, *is_p_r = NULL;
824: MatType Stype;
825: PetscScalar *work, *S_data, *schur_factor, infty = PETSC_MAX_REAL;
826: PetscScalar *SEj_arr = NULL, *SEjinv_arr = NULL;
827: const PetscScalar *rS_data;
828: PetscInt n, n_I, size_schur, size_active_schur, cum, cum2;
829: PetscBool economic, solver_S, S_lower_triangular = PETSC_FALSE;
830: PetscBool schur_has_vertices, factor_workaround;
831: PetscBool use_cholesky;
832: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
833: PetscBool oldpin;
834: #endif
836: /* get sizes */
837: n_I = 0;
838: if (is_I_layer) ISGetLocalSize(is_I_layer, &n_I);
839: economic = PETSC_FALSE;
840: ISGetLocalSize(sub_schurs->is_I, &cum);
841: if (cum != n_I) economic = PETSC_TRUE;
842: MatGetLocalSize(sub_schurs->A, &n, NULL);
843: size_active_schur = local_size;
845: /* import scaling vector (wrong formulation if we have 3D edges) */
846: if (scaling && compute_Stilda) {
847: const PetscScalar *array;
848: PetscScalar *array2;
849: const PetscInt *idxs;
850: PetscInt i;
852: ISGetIndices(sub_schurs->is_Ej_all, &idxs);
853: VecCreateSeq(PETSC_COMM_SELF, size_active_schur, &Dall);
854: VecGetArrayRead(scaling, &array);
855: VecGetArray(Dall, &array2);
856: for (i = 0; i < size_active_schur; i++) array2[i] = array[idxs[i]];
857: VecRestoreArray(Dall, &array2);
858: VecRestoreArrayRead(scaling, &array);
859: ISRestoreIndices(sub_schurs->is_Ej_all, &idxs);
860: deluxe = PETSC_FALSE;
861: }
863: /* size active schurs does not count any dirichlet or vertex dof on the interface */
864: factor_workaround = PETSC_FALSE;
865: schur_has_vertices = PETSC_FALSE;
866: cum = n_I + size_active_schur;
867: if (sub_schurs->is_dir) {
868: const PetscInt *idxs;
869: PetscInt n_dir;
871: ISGetLocalSize(sub_schurs->is_dir, &n_dir);
872: ISGetIndices(sub_schurs->is_dir, &idxs);
873: PetscArraycpy(all_local_idx_N + cum, idxs, n_dir);
874: ISRestoreIndices(sub_schurs->is_dir, &idxs);
875: cum += n_dir;
876: if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
877: }
878: /* include the primal vertices in the Schur complement */
879: if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
880: PetscInt n_v;
882: ISGetLocalSize(sub_schurs->is_vertices, &n_v);
883: if (n_v) {
884: const PetscInt *idxs;
886: ISGetIndices(sub_schurs->is_vertices, &idxs);
887: PetscArraycpy(all_local_idx_N + cum, idxs, n_v);
888: ISRestoreIndices(sub_schurs->is_vertices, &idxs);
889: cum += n_v;
890: if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
891: schur_has_vertices = PETSC_TRUE;
892: }
893: }
894: size_schur = cum - n_I;
895: ISCreateGeneral(PETSC_COMM_SELF, cum, all_local_idx_N, PETSC_OWN_POINTER, &is_A_all);
896: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
897: oldpin = sub_schurs->A->boundtocpu;
898: MatBindToCPU(sub_schurs->A, PETSC_TRUE);
899: #endif
900: if (cum == n) {
901: ISSetPermutation(is_A_all);
902: MatPermute(sub_schurs->A, is_A_all, is_A_all, &A);
903: } else {
904: MatCreateSubMatrix(sub_schurs->A, is_A_all, is_A_all, MAT_INITIAL_MATRIX, &A);
905: }
906: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
907: MatBindToCPU(sub_schurs->A, oldpin);
908: #endif
909: MatSetOptionsPrefixFactor(A, sub_schurs->prefix);
910: MatAppendOptionsPrefixFactor(A, "sub_schurs_");
912: /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
913: this is a workaround */
914: if (benign_n) {
915: Vec v, benign_AIIm1_ones;
916: ISLocalToGlobalMapping N_to_reor;
917: IS is_p0, is_p0_p;
918: PetscScalar *cs_AIB, *AIIm1_data;
919: PetscInt sizeA;
921: ISLocalToGlobalMappingCreateIS(is_A_all, &N_to_reor);
922: ISCreateGeneral(PETSC_COMM_SELF, benign_n, benign_p0_lidx, PETSC_COPY_VALUES, &is_p0);
923: ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, is_p0, &is_p0_p);
924: ISDestroy(&is_p0);
925: MatCreateVecs(A, &v, &benign_AIIm1_ones);
926: VecGetSize(v, &sizeA);
927: MatCreateSeqDense(PETSC_COMM_SELF, sizeA, benign_n, NULL, &benign_AIIm1_ones_mat);
928: MatCreateSeqDense(PETSC_COMM_SELF, size_schur, benign_n, NULL, &cs_AIB_mat);
929: MatDenseGetArray(cs_AIB_mat, &cs_AIB);
930: MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data);
931: PetscMalloc1(benign_n, &is_p_r);
932: /* compute colsum of A_IB restricted to pressures */
933: for (i = 0; i < benign_n; i++) {
934: const PetscScalar *array;
935: const PetscInt *idxs;
936: PetscInt j, nz;
938: ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, benign_zerodiag_subs[i], &is_p_r[i]);
939: ISGetLocalSize(is_p_r[i], &nz);
940: ISGetIndices(is_p_r[i], &idxs);
941: for (j = 0; j < nz; j++) AIIm1_data[idxs[j] + sizeA * i] = 1.;
942: ISRestoreIndices(is_p_r[i], &idxs);
943: VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i);
944: MatMult(A, benign_AIIm1_ones, v);
945: VecResetArray(benign_AIIm1_ones);
946: VecGetArrayRead(v, &array);
947: for (j = 0; j < size_schur; j++) {
948: #if defined(PETSC_USE_COMPLEX)
949: cs_AIB[i * size_schur + j] = (PetscRealPart(array[j + n_I]) / nz + PETSC_i * (PetscImaginaryPart(array[j + n_I]) / nz));
950: #else
951: cs_AIB[i * size_schur + j] = array[j + n_I] / nz;
952: #endif
953: }
954: VecRestoreArrayRead(v, &array);
955: }
956: MatDenseRestoreArray(cs_AIB_mat, &cs_AIB);
957: MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data);
958: VecDestroy(&v);
959: VecDestroy(&benign_AIIm1_ones);
960: MatSetOption(A, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE);
961: MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE);
962: MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
963: MatZeroRowsColumnsIS(A, is_p0_p, 1.0, NULL, NULL);
964: ISDestroy(&is_p0_p);
965: ISLocalToGlobalMappingDestroy(&N_to_reor);
966: }
967: MatSetOption(A, MAT_SYMMETRIC, sub_schurs->is_symmetric);
968: MatSetOption(A, MAT_HERMITIAN, sub_schurs->is_hermitian);
969: MatSetOption(A, MAT_SPD, sub_schurs->is_posdef);
971: /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
972: use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
974: /* when using the benign subspace trick, the local Schur complements are SPD */
975: /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
976: Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
977: if (benign_trick) {
978: sub_schurs->is_posdef = PETSC_TRUE;
979: PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &flg);
980: if (flg) use_cholesky = PETSC_FALSE;
981: }
983: if (n_I) {
984: IS is_schur;
985: char stype[64];
986: PetscBool gpu = PETSC_FALSE;
988: if (use_cholesky) {
989: MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_CHOLESKY, &F);
990: } else {
991: MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_LU, &F);
992: }
993: MatSetErrorIfFailure(A, PETSC_TRUE);
994: #if defined(PETSC_HAVE_MKL_PARDISO)
995: if (benign_trick) MatMkl_PardisoSetCntl(F, 10, 10);
996: #endif
997: /* subsets ordered last */
998: ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is_schur);
999: MatFactorSetSchurIS(F, is_schur);
1000: ISDestroy(&is_schur);
1002: /* factorization step */
1003: if (use_cholesky) {
1004: MatCholeskyFactorSymbolic(F, A, NULL, NULL);
1005: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1006: MatMumpsSetIcntl(F, 19, 2);
1007: #endif
1008: MatCholeskyFactorNumeric(F, A, NULL);
1009: S_lower_triangular = PETSC_TRUE;
1010: } else {
1011: MatLUFactorSymbolic(F, A, NULL, NULL, NULL);
1012: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1013: MatMumpsSetIcntl(F, 19, 3);
1014: #endif
1015: MatLUFactorNumeric(F, A, NULL);
1016: }
1017: MatViewFromOptions(F, (PetscObject)A, "-mat_factor_view");
1019: if (matl_dbg_viewer) {
1020: Mat S;
1021: IS is;
1023: PetscObjectSetName((PetscObject)A, "A");
1024: MatView(A, matl_dbg_viewer);
1025: MatFactorCreateSchurComplement(F, &S, NULL);
1026: PetscObjectSetName((PetscObject)S, "S");
1027: MatView(S, matl_dbg_viewer);
1028: MatDestroy(&S);
1029: ISCreateStride(PETSC_COMM_SELF, n_I, 0, 1, &is);
1030: PetscObjectSetName((PetscObject)is, "I");
1031: ISView(is, matl_dbg_viewer);
1032: ISDestroy(&is);
1033: ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is);
1034: PetscObjectSetName((PetscObject)is, "B");
1035: ISView(is, matl_dbg_viewer);
1036: ISDestroy(&is);
1037: PetscObjectSetName((PetscObject)is_A_all, "IA");
1038: ISView(is_A_all, matl_dbg_viewer);
1039: for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) {
1040: IS is;
1041: char name[16];
1043: PetscSNPrintf(name, sizeof(name), "IE%" PetscInt_FMT, i);
1044: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
1045: ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &is);
1046: PetscObjectSetName((PetscObject)is, name);
1047: ISView(is, matl_dbg_viewer);
1048: ISDestroy(&is);
1049: if (sub_schurs->change) {
1050: Mat T;
1052: PetscSNPrintf(name, sizeof(name), "TE%" PetscInt_FMT, i);
1053: KSPGetOperators(sub_schurs->change[i], &T, NULL);
1054: PetscObjectSetName((PetscObject)T, name);
1055: MatView(T, matl_dbg_viewer);
1056: PetscSNPrintf(name, sizeof(name), "ITE%" PetscInt_FMT, i);
1057: PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i], name);
1058: ISView(sub_schurs->change_primal_sub[i], matl_dbg_viewer);
1059: }
1060: cum += subset_size;
1061: }
1062: PetscViewerFlush(matl_dbg_viewer);
1063: }
1065: /* get explicit Schur Complement computed during numeric factorization */
1066: MatFactorGetSchurComplement(F, &S_all, NULL);
1067: PetscStrncpy(stype, MATSEQDENSE, sizeof(stype));
1068: #if defined(PETSC_HAVE_CUDA)
1069: PetscObjectTypeCompareAny((PetscObject)A, &gpu, MATSEQAIJVIENNACL, MATSEQAIJCUSPARSE, "");
1070: #endif
1071: if (gpu) PetscStrncpy(stype, MATSEQDENSECUDA, sizeof(stype));
1072: PetscOptionsGetString(NULL, sub_schurs->prefix, "-sub_schurs_schur_mat_type", stype, sizeof(stype), NULL);
1073: MatConvert(S_all, stype, MAT_INPLACE_MATRIX, &S_all);
1074: MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef);
1075: MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian);
1076: MatGetType(S_all, &Stype);
1078: /* we can reuse the solvers if we are not using the economic version */
1079: reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1080: if (!sub_schurs->gdsw) {
1081: factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1082: if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) reuse_solvers = factor_workaround = PETSC_FALSE;
1083: }
1084: solver_S = PETSC_TRUE;
1086: /* update the Schur complement with the change of basis on the pressures */
1087: if (benign_n) {
1088: const PetscScalar *cs_AIB;
1089: PetscScalar *S_data, *AIIm1_data;
1090: Mat S2 = NULL, S3 = NULL; /* dbg */
1091: PetscScalar *S2_data, *S3_data; /* dbg */
1092: Vec v, benign_AIIm1_ones;
1093: PetscInt sizeA;
1095: MatDenseGetArray(S_all, &S_data);
1096: MatCreateVecs(A, &v, &benign_AIIm1_ones);
1097: VecGetSize(v, &sizeA);
1098: #if defined(PETSC_HAVE_MUMPS)
1099: MatMumpsSetIcntl(F, 26, 0);
1100: #endif
1101: #if defined(PETSC_HAVE_MKL_PARDISO)
1102: MatMkl_PardisoSetCntl(F, 70, 1);
1103: #endif
1104: MatDenseGetArrayRead(cs_AIB_mat, &cs_AIB);
1105: MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data);
1106: if (matl_dbg_viewer) {
1107: MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S2);
1108: MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S3);
1109: MatDenseGetArray(S2, &S2_data);
1110: MatDenseGetArray(S3, &S3_data);
1111: }
1112: for (i = 0; i < benign_n; i++) {
1113: PetscScalar *array, sum = 0., one = 1., *sums;
1114: const PetscInt *idxs;
1115: PetscInt k, j, nz;
1116: PetscBLASInt B_k, B_n;
1118: PetscCalloc1(benign_n, &sums);
1119: VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i);
1120: VecCopy(benign_AIIm1_ones, v);
1121: MatSolve(F, v, benign_AIIm1_ones);
1122: MatMult(A, benign_AIIm1_ones, v);
1123: VecResetArray(benign_AIIm1_ones);
1124: /* p0 dofs (eliminated) are excluded from the sums */
1125: for (k = 0; k < benign_n; k++) {
1126: ISGetLocalSize(is_p_r[k], &nz);
1127: ISGetIndices(is_p_r[k], &idxs);
1128: for (j = 0; j < nz - 1; j++) sums[k] -= AIIm1_data[idxs[j] + sizeA * i];
1129: ISRestoreIndices(is_p_r[k], &idxs);
1130: }
1131: VecGetArrayRead(v, (const PetscScalar **)&array);
1132: if (matl_dbg_viewer) {
1133: Vec vv;
1134: char name[16];
1136: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array + n_I, &vv);
1137: PetscSNPrintf(name, sizeof(name), "Pvs%" PetscInt_FMT, i);
1138: PetscObjectSetName((PetscObject)vv, name);
1139: VecView(vv, matl_dbg_viewer);
1140: }
1141: /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1142: /* cs_AIB already scaled by 1./nz */
1143: B_k = 1;
1144: for (k = 0; k < benign_n; k++) {
1145: sum = sums[k];
1146: PetscBLASIntCast(size_schur, &B_n);
1148: if (PetscAbsScalar(sum) == 0.0) continue;
1149: if (k == i) {
1150: PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1151: if (matl_dbg_viewer) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
1152: } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1153: sum /= 2.0;
1154: PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1155: if (matl_dbg_viewer) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
1156: }
1157: }
1158: sum = 1.;
1159: PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1160: if (matl_dbg_viewer) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S2_data, &B_n));
1161: VecRestoreArrayRead(v, (const PetscScalar **)&array);
1162: /* set p0 entry of AIIm1_ones to zero */
1163: ISGetLocalSize(is_p_r[i], &nz);
1164: ISGetIndices(is_p_r[i], &idxs);
1165: for (j = 0; j < benign_n; j++) AIIm1_data[idxs[nz - 1] + sizeA * j] = 0.;
1166: ISRestoreIndices(is_p_r[i], &idxs);
1167: PetscFree(sums);
1168: }
1169: VecDestroy(&benign_AIIm1_ones);
1170: if (matl_dbg_viewer) {
1171: MatDenseRestoreArray(S2, &S2_data);
1172: MatDenseRestoreArray(S3, &S3_data);
1173: }
1174: if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1175: PetscInt k, j;
1176: for (k = 0; k < size_schur; k++) {
1177: for (j = k; j < size_schur; j++) S_data[j * size_schur + k] = PetscConj(S_data[k * size_schur + j]);
1178: }
1179: }
1181: /* restore defaults */
1182: #if defined(PETSC_HAVE_MUMPS)
1183: MatMumpsSetIcntl(F, 26, -1);
1184: #endif
1185: #if defined(PETSC_HAVE_MKL_PARDISO)
1186: MatMkl_PardisoSetCntl(F, 70, 0);
1187: #endif
1188: MatDenseRestoreArrayRead(cs_AIB_mat, &cs_AIB);
1189: MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data);
1190: VecDestroy(&v);
1191: MatDenseRestoreArray(S_all, &S_data);
1192: if (matl_dbg_viewer) {
1193: Mat S;
1195: MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED);
1196: MatFactorCreateSchurComplement(F, &S, NULL);
1197: PetscObjectSetName((PetscObject)S, "Sb");
1198: MatView(S, matl_dbg_viewer);
1199: MatDestroy(&S);
1200: PetscObjectSetName((PetscObject)S2, "S2P");
1201: MatView(S2, matl_dbg_viewer);
1202: PetscObjectSetName((PetscObject)S3, "S3P");
1203: MatView(S3, matl_dbg_viewer);
1204: PetscObjectSetName((PetscObject)cs_AIB_mat, "cs");
1205: MatView(cs_AIB_mat, matl_dbg_viewer);
1206: MatFactorGetSchurComplement(F, &S_all, NULL);
1207: }
1208: MatDestroy(&S2);
1209: MatDestroy(&S3);
1210: }
1211: if (!reuse_solvers) {
1212: for (i = 0; i < benign_n; i++) ISDestroy(&is_p_r[i]);
1213: PetscFree(is_p_r);
1214: MatDestroy(&cs_AIB_mat);
1215: MatDestroy(&benign_AIIm1_ones_mat);
1216: }
1217: } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1218: MatConvert(A, MATSEQDENSE, MAT_INITIAL_MATRIX, &S_all);
1219: MatGetType(S_all, &Stype);
1220: reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1221: factor_workaround = PETSC_FALSE;
1222: solver_S = PETSC_FALSE;
1223: }
1225: if (reuse_solvers) {
1226: Mat A_II, pA_II, Afake;
1227: Vec vec1_B;
1228: PCBDDCReuseSolvers msolv_ctx;
1229: PetscInt n_R;
1231: if (sub_schurs->reuse_solver) {
1232: PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
1233: } else {
1234: PetscNew(&sub_schurs->reuse_solver);
1235: }
1236: msolv_ctx = sub_schurs->reuse_solver;
1237: MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, &pA_II, NULL, NULL, NULL);
1238: PetscObjectReference((PetscObject)F);
1239: msolv_ctx->F = F;
1240: MatCreateVecs(F, &msolv_ctx->sol, NULL);
1241: /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1242: {
1243: PetscScalar *array;
1244: PetscInt n;
1246: VecGetLocalSize(msolv_ctx->sol, &n);
1247: VecGetArray(msolv_ctx->sol, &array);
1248: VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol), 1, n, array, &msolv_ctx->rhs);
1249: VecRestoreArray(msolv_ctx->sol, &array);
1250: }
1251: msolv_ctx->has_vertices = schur_has_vertices;
1253: /* interior solver */
1254: PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->interior_solver);
1255: PCSetOperators(msolv_ctx->interior_solver, A_II, pA_II);
1256: PCSetType(msolv_ctx->interior_solver, PCSHELL);
1257: PCShellSetName(msolv_ctx->interior_solver, "Interior solver (w/o Schur factorization)");
1258: PCShellSetContext(msolv_ctx->interior_solver, msolv_ctx);
1259: PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View);
1260: PCShellSetApply(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Interior);
1261: PCShellSetApplyTranspose(msolv_ctx->interior_solver, PCBDDCReuseSolvers_InteriorTranspose);
1262: if (sub_schurs->gdsw) PCShellSetDestroy(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Destroy);
1264: /* correction solver */
1265: if (!sub_schurs->gdsw) {
1266: PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->correction_solver);
1267: PCSetType(msolv_ctx->correction_solver, PCSHELL);
1268: PCShellSetName(msolv_ctx->correction_solver, "Correction solver (with Schur factorization)");
1269: PCShellSetContext(msolv_ctx->correction_solver, msolv_ctx);
1270: PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View);
1271: PCShellSetApply(msolv_ctx->correction_solver, PCBDDCReuseSolvers_Correction);
1272: PCShellSetApplyTranspose(msolv_ctx->correction_solver, PCBDDCReuseSolvers_CorrectionTranspose);
1274: /* scatter and vecs for Schur complement solver */
1275: MatCreateVecs(S_all, &msolv_ctx->sol_B, &msolv_ctx->rhs_B);
1276: MatCreateVecs(sub_schurs->S, &vec1_B, NULL);
1277: if (!schur_has_vertices) {
1278: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_A_all, &msolv_ctx->is_B);
1279: VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, NULL, &msolv_ctx->correction_scatter_B);
1280: PetscObjectReference((PetscObject)is_A_all);
1281: msolv_ctx->is_R = is_A_all;
1282: } else {
1283: IS is_B_all;
1284: const PetscInt *idxs;
1285: PetscInt dual, n_v, n;
1287: ISGetLocalSize(sub_schurs->is_vertices, &n_v);
1288: dual = size_schur - n_v;
1289: ISGetLocalSize(is_A_all, &n);
1290: ISGetIndices(is_A_all, &idxs);
1291: ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), dual, idxs + n_I, PETSC_COPY_VALUES, &is_B_all);
1292: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_B_all, &msolv_ctx->is_B);
1293: ISDestroy(&is_B_all);
1294: ISCreateStride(PetscObjectComm((PetscObject)is_A_all), dual, 0, 1, &is_B_all);
1295: VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, is_B_all, &msolv_ctx->correction_scatter_B);
1296: ISDestroy(&is_B_all);
1297: ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), n - n_v, idxs, PETSC_COPY_VALUES, &msolv_ctx->is_R);
1298: ISRestoreIndices(is_A_all, &idxs);
1299: }
1300: ISGetLocalSize(msolv_ctx->is_R, &n_R);
1301: MatCreateSeqAIJ(PETSC_COMM_SELF, n_R, n_R, 0, NULL, &Afake);
1302: MatAssemblyBegin(Afake, MAT_FINAL_ASSEMBLY);
1303: MatAssemblyEnd(Afake, MAT_FINAL_ASSEMBLY);
1304: PCSetOperators(msolv_ctx->correction_solver, Afake, Afake);
1305: MatDestroy(&Afake);
1306: VecDestroy(&vec1_B);
1307: }
1308: /* communicate benign info to solver context */
1309: if (benign_n) {
1310: PetscScalar *array;
1312: msolv_ctx->benign_n = benign_n;
1313: msolv_ctx->benign_zerodiag_subs = is_p_r;
1314: PetscMalloc1(benign_n, &msolv_ctx->benign_save_vals);
1315: msolv_ctx->benign_csAIB = cs_AIB_mat;
1316: MatCreateVecs(cs_AIB_mat, &msolv_ctx->benign_corr_work, NULL);
1317: VecGetArray(msolv_ctx->benign_corr_work, &array);
1318: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array, &msolv_ctx->benign_dummy_schur_vec);
1319: VecRestoreArray(msolv_ctx->benign_corr_work, &array);
1320: msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1321: }
1322: } else {
1323: if (sub_schurs->reuse_solver) PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
1324: PetscFree(sub_schurs->reuse_solver);
1325: }
1326: MatDestroy(&A);
1327: ISDestroy(&is_A_all);
1329: /* Work arrays */
1330: PetscMalloc1(max_subset_size * max_subset_size, &work);
1332: /* S_Ej_all */
1333: cum = cum2 = 0;
1334: MatDenseGetArrayRead(S_all, &rS_data);
1335: MatSeqAIJGetArray(sub_schurs->S_Ej_all, &SEj_arr);
1336: if (sub_schurs->sum_S_Ej_inv_all) MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr);
1337: if (sub_schurs->gdsw) MatCreateSubMatrices(sub_schurs->A, sub_schurs->n_subs, sub_schurs->is_subs, sub_schurs->is_subs, MAT_INITIAL_MATRIX, &gdswA);
1338: for (i = 0; i < sub_schurs->n_subs; i++) {
1339: PetscInt j;
1341: /* get S_E (or K^i_EE for GDSW) */
1342: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
1343: if (sub_schurs->gdsw) {
1344: Mat T;
1346: MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &T);
1347: MatConvert(gdswA[i], MATDENSE, MAT_REUSE_MATRIX, &T);
1348: MatDestroy(&T);
1349: } else {
1350: if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1351: PetscInt k;
1352: for (k = 0; k < subset_size; k++) {
1353: for (j = k; j < subset_size; j++) {
1354: work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1355: work[j * subset_size + k] = PetscConj(rS_data[cum2 + k * size_schur + j]);
1356: }
1357: }
1358: } else { /* just copy to workspace */
1359: PetscInt k;
1360: for (k = 0; k < subset_size; k++) {
1361: for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1362: }
1363: }
1364: }
1365: /* insert S_E values */
1366: if (sub_schurs->change) {
1367: Mat change_sub, SEj, T;
1369: /* change basis */
1370: KSPGetOperators(sub_schurs->change[i], &change_sub, NULL);
1371: MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj);
1372: if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1373: Mat T2;
1374: MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2);
1375: MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T);
1376: MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T);
1377: MatDestroy(&T2);
1378: } else {
1379: MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T);
1380: }
1381: MatCopy(T, SEj, SAME_NONZERO_PATTERN);
1382: MatDestroy(&T);
1383: MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], 1.0, NULL, NULL);
1384: MatDestroy(&SEj);
1385: }
1386: PetscArraycpy(SEj_arr, work, subset_size * subset_size);
1387: if (compute_Stilda) {
1388: if (deluxe) { /* if adaptivity is requested, invert S_E blocks */
1389: Mat M;
1390: const PetscScalar *vals;
1391: PetscBool isdense, isdensecuda;
1393: MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &M);
1394: MatSetOption(M, MAT_SPD, sub_schurs->is_posdef);
1395: MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian);
1396: if (!PetscBTLookup(sub_schurs->is_edge, i)) MatSetType(M, Stype);
1397: PetscObjectTypeCompare((PetscObject)M, MATSEQDENSE, &isdense);
1398: PetscObjectTypeCompare((PetscObject)M, MATSEQDENSECUDA, &isdensecuda);
1399: if (use_cholesky) {
1400: MatCholeskyFactor(M, NULL, NULL);
1401: } else {
1402: MatLUFactor(M, NULL, NULL, NULL);
1403: }
1404: if (isdense) {
1405: MatSeqDenseInvertFactors_Private(M);
1406: #if defined(PETSC_HAVE_CUDA)
1407: } else if (isdensecuda) {
1408: MatSeqDenseCUDAInvertFactors_Private(M);
1409: #endif
1410: } else SETERRQ(PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "Not implemented for type %s", Stype);
1411: MatDenseGetArrayRead(M, &vals);
1412: PetscArraycpy(SEjinv_arr, vals, subset_size * subset_size);
1413: MatDenseRestoreArrayRead(M, &vals);
1414: MatDestroy(&M);
1415: } else if (scaling) { /* not using deluxe */
1416: Mat SEj;
1417: Vec D;
1418: PetscScalar *array;
1420: MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj);
1421: VecGetArray(Dall, &array);
1422: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, array + cum, &D);
1423: VecRestoreArray(Dall, &array);
1424: VecShift(D, -1.);
1425: MatDiagonalScale(SEj, D, D);
1426: MatDestroy(&SEj);
1427: VecDestroy(&D);
1428: PetscArraycpy(SEj_arr, work, subset_size * subset_size);
1429: }
1430: }
1431: cum += subset_size;
1432: cum2 += subset_size * (size_schur + 1);
1433: SEj_arr += subset_size * subset_size;
1434: if (SEjinv_arr) SEjinv_arr += subset_size * subset_size;
1435: }
1436: if (sub_schurs->gdsw) MatDestroySubMatrices(sub_schurs->n_subs, &gdswA);
1437: MatDenseRestoreArrayRead(S_all, &rS_data);
1438: MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &SEj_arr);
1439: if (sub_schurs->sum_S_Ej_inv_all) MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr);
1440: if (solver_S) MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED);
1442: /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1443: however, preliminary tests indicate using GPUs is still faster in the solve phase */
1444: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1445: if (reuse_solvers) {
1446: Mat St;
1447: MatFactorSchurStatus st;
1449: flg = PETSC_FALSE;
1450: PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_schur_pin_to_cpu", &flg, NULL);
1451: MatFactorGetSchurComplement(F, &St, &st);
1452: MatBindToCPU(St, flg);
1453: MatFactorRestoreSchurComplement(F, &St, st);
1454: }
1455: #endif
1457: schur_factor = NULL;
1458: if (compute_Stilda && size_active_schur) {
1459: if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1460: MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr);
1461: PetscArraycpy(SEjinv_arr, work, size_schur * size_schur);
1462: MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr);
1463: } else {
1464: Mat S_all_inv = NULL;
1466: if (solver_S && !sub_schurs->gdsw) {
1467: /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1468: The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1469: if (factor_workaround) { /* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1470: PetscScalar *data;
1471: PetscInt nd = 0;
1474: MatFactorGetSchurComplement(F, &S_all_inv, NULL);
1475: MatDenseGetArray(S_all_inv, &data);
1476: if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1477: ISGetLocalSize(sub_schurs->is_dir, &nd);
1478: }
1480: /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1481: if (schur_has_vertices) {
1482: Mat M;
1483: PetscScalar *tdata;
1484: PetscInt nv = 0, news;
1486: ISGetLocalSize(sub_schurs->is_vertices, &nv);
1487: news = size_active_schur + nv;
1488: PetscCalloc1(news * news, &tdata);
1489: for (i = 0; i < size_active_schur; i++) {
1490: PetscArraycpy(tdata + i * (news + 1), data + i * (size_schur + 1), size_active_schur - i);
1491: PetscArraycpy(tdata + i * (news + 1) + size_active_schur - i, data + i * size_schur + size_active_schur + nd, nv);
1492: }
1493: for (i = 0; i < nv; i++) {
1494: PetscInt k = i + size_active_schur;
1495: PetscArraycpy(tdata + k * (news + 1), data + (k + nd) * (size_schur + 1), nv - i);
1496: }
1498: MatCreateSeqDense(PETSC_COMM_SELF, news, news, tdata, &M);
1499: MatSetOption(M, MAT_SPD, PETSC_TRUE);
1500: MatCholeskyFactor(M, NULL, NULL);
1501: /* save the factors */
1502: cum = 0;
1503: PetscMalloc1((size_active_schur * (size_active_schur + 1)) / 2 + nd, &schur_factor);
1504: for (i = 0; i < size_active_schur; i++) {
1505: PetscArraycpy(schur_factor + cum, tdata + i * (news + 1), size_active_schur - i);
1506: cum += size_active_schur - i;
1507: }
1508: for (i = 0; i < nd; i++) schur_factor[cum + i] = PetscSqrtReal(PetscRealPart(data[(i + size_active_schur) * (size_schur + 1)]));
1509: MatSeqDenseInvertFactors_Private(M);
1510: /* move back just the active dofs to the Schur complement */
1511: for (i = 0; i < size_active_schur; i++) PetscArraycpy(data + i * size_schur, tdata + i * news, size_active_schur);
1512: PetscFree(tdata);
1513: MatDestroy(&M);
1514: } else { /* we can factorize and invert just the activedofs */
1515: Mat M;
1516: PetscScalar *aux;
1518: PetscMalloc1(nd, &aux);
1519: for (i = 0; i < nd; i++) aux[i] = 1.0 / data[(i + size_active_schur) * (size_schur + 1)];
1520: MatCreateSeqDense(PETSC_COMM_SELF, size_active_schur, size_active_schur, data, &M);
1521: MatDenseSetLDA(M, size_schur);
1522: MatSetOption(M, MAT_SPD, PETSC_TRUE);
1523: MatCholeskyFactor(M, NULL, NULL);
1524: MatSeqDenseInvertFactors_Private(M);
1525: MatDestroy(&M);
1526: MatCreateSeqDense(PETSC_COMM_SELF, size_schur, nd, data + size_active_schur * size_schur, &M);
1527: MatZeroEntries(M);
1528: MatDestroy(&M);
1529: MatCreateSeqDense(PETSC_COMM_SELF, nd, size_schur, data + size_active_schur, &M);
1530: MatDenseSetLDA(M, size_schur);
1531: MatZeroEntries(M);
1532: MatDestroy(&M);
1533: for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = aux[i];
1534: PetscFree(aux);
1535: }
1536: MatDenseRestoreArray(S_all_inv, &data);
1537: } else { /* use MatFactor calls to invert S */
1538: MatFactorInvertSchurComplement(F);
1539: MatFactorGetSchurComplement(F, &S_all_inv, NULL);
1540: }
1541: } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */
1542: PetscObjectReference((PetscObject)S_all);
1543: S_all_inv = S_all;
1544: MatDenseGetArray(S_all_inv, &S_data);
1545: PetscBLASIntCast(size_schur, &B_N);
1546: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1547: if (use_potr) {
1548: PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, S_data, &B_N, &B_ierr));
1550: PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, S_data, &B_N, &B_ierr));
1552: } else if (use_sytr) {
1553: PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1555: PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_ierr));
1557: } else {
1558: PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, S_data, &B_N, pivots, &B_ierr));
1560: PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1562: }
1563: PetscLogFlops(1.0 * size_schur * size_schur * size_schur);
1564: PetscFPTrapPop();
1565: MatDenseRestoreArray(S_all_inv, &S_data);
1566: } else if (sub_schurs->gdsw) {
1567: Mat tS, tX, SEj, S_II, S_IE, S_EE;
1568: KSP pS_II;
1569: PC pS_II_pc;
1570: IS EE, II;
1571: PetscInt nS;
1573: MatFactorCreateSchurComplement(F, &tS, NULL);
1574: MatGetSize(tS, &nS, NULL);
1575: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr);
1576: for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { /* naive implementation */
1577: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
1578: MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, SEjinv_arr, &SEj);
1580: ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &EE);
1581: ISComplement(EE, 0, nS, &II);
1582: MatCreateSubMatrix(tS, II, II, MAT_INITIAL_MATRIX, &S_II);
1583: MatCreateSubMatrix(tS, II, EE, MAT_INITIAL_MATRIX, &S_IE);
1584: MatCreateSubMatrix(tS, EE, EE, MAT_INITIAL_MATRIX, &S_EE);
1585: ISDestroy(&II);
1586: ISDestroy(&EE);
1588: KSPCreate(PETSC_COMM_SELF, &pS_II);
1589: KSPSetType(pS_II, KSPPREONLY);
1590: KSPGetPC(pS_II, &pS_II_pc);
1591: PCSetType(pS_II_pc, PCSVD);
1592: KSPSetOptionsPrefix(pS_II, sub_schurs->prefix);
1593: KSPAppendOptionsPrefix(pS_II, "pseudo_");
1594: KSPSetOperators(pS_II, S_II, S_II);
1595: MatDestroy(&S_II);
1596: KSPSetFromOptions(pS_II);
1597: KSPSetUp(pS_II);
1598: MatDuplicate(S_IE, MAT_DO_NOT_COPY_VALUES, &tX);
1599: KSPMatSolve(pS_II, S_IE, tX);
1600: KSPDestroy(&pS_II);
1602: MatTransposeMatMult(S_IE, tX, MAT_REUSE_MATRIX, PETSC_DEFAULT, &SEj);
1603: MatDestroy(&S_IE);
1604: MatDestroy(&tX);
1605: MatAYPX(SEj, -1, S_EE, SAME_NONZERO_PATTERN);
1606: MatDestroy(&S_EE);
1608: MatDestroy(&SEj);
1609: cum += subset_size;
1610: SEjinv_arr += subset_size * subset_size;
1611: }
1612: MatDestroy(&tS);
1613: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr);
1614: }
1615: /* S_Ej_tilda_all */
1616: cum = cum2 = 0;
1617: rS_data = NULL;
1618: if (S_all_inv) MatDenseGetArrayRead(S_all_inv, &rS_data);
1619: MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr);
1620: for (i = 0; i < sub_schurs->n_subs; i++) {
1621: PetscInt j;
1623: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
1624: /* get (St^-1)_E */
1625: /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1626: will be properly accessed later during adaptive selection */
1627: if (rS_data) {
1628: if (S_lower_triangular) {
1629: PetscInt k;
1630: if (sub_schurs->change) {
1631: for (k = 0; k < subset_size; k++) {
1632: for (j = k; j < subset_size; j++) {
1633: work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1634: work[j * subset_size + k] = work[k * subset_size + j];
1635: }
1636: }
1637: } else {
1638: for (k = 0; k < subset_size; k++) {
1639: for (j = k; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1640: }
1641: }
1642: } else {
1643: PetscInt k;
1644: for (k = 0; k < subset_size; k++) {
1645: for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1646: }
1647: }
1648: }
1649: if (sub_schurs->change) {
1650: Mat change_sub, SEj, T;
1651: PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1. / PETSC_SMALL;
1653: /* change basis */
1654: KSPGetOperators(sub_schurs->change[i], &change_sub, NULL);
1655: MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, rS_data ? work : SEjinv_arr, &SEj);
1656: if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1657: Mat T2;
1658: MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2);
1659: MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T);
1660: MatDestroy(&T2);
1661: MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T);
1662: } else {
1663: MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T);
1664: }
1665: MatCopy(T, SEj, SAME_NONZERO_PATTERN);
1666: MatDestroy(&T);
1667: MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], val, NULL, NULL);
1668: MatDestroy(&SEj);
1669: }
1670: if (rS_data) PetscArraycpy(SEjinv_arr, work, subset_size * subset_size);
1671: cum += subset_size;
1672: cum2 += subset_size * (size_schur + 1);
1673: SEjinv_arr += subset_size * subset_size;
1674: }
1675: MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr);
1676: if (S_all_inv) {
1677: MatDenseRestoreArrayRead(S_all_inv, &rS_data);
1678: if (solver_S) {
1679: if (schur_has_vertices) {
1680: MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_FACTORED);
1681: } else {
1682: MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_INVERTED);
1683: }
1684: }
1685: }
1686: MatDestroy(&S_all_inv);
1687: }
1689: /* move back factors if needed */
1690: if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) {
1691: Mat S_tmp;
1692: PetscInt nd = 0;
1695: MatFactorGetSchurComplement(F, &S_tmp, NULL);
1696: if (use_potr) {
1697: PetscScalar *data;
1699: MatDenseGetArray(S_tmp, &data);
1700: PetscArrayzero(data, size_schur * size_schur);
1702: if (S_lower_triangular) {
1703: cum = 0;
1704: for (i = 0; i < size_active_schur; i++) {
1705: PetscArraycpy(data + i * (size_schur + 1), schur_factor + cum, size_active_schur - i);
1706: cum += size_active_schur - i;
1707: }
1708: } else {
1709: PetscArraycpy(data, schur_factor, size_schur * size_schur);
1710: }
1711: if (sub_schurs->is_dir) {
1712: ISGetLocalSize(sub_schurs->is_dir, &nd);
1713: for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = schur_factor[cum + i];
1714: }
1715: /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1716: set the diagonal entry of the Schur factor to a very large value */
1717: for (i = size_active_schur + nd; i < size_schur; i++) data[i * (size_schur + 1)] = infty;
1718: MatDenseRestoreArray(S_tmp, &data);
1719: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
1720: MatFactorRestoreSchurComplement(F, &S_tmp, MAT_FACTOR_SCHUR_FACTORED);
1721: }
1722: } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */
1723: PetscScalar *data;
1724: PetscInt nd = 0;
1726: if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1727: ISGetLocalSize(sub_schurs->is_dir, &nd);
1728: }
1729: MatFactorGetSchurComplement(F, &S_all, NULL);
1730: MatDenseGetArray(S_all, &data);
1731: for (i = 0; i < size_active_schur; i++) PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur);
1732: for (i = size_active_schur + nd; i < size_schur; i++) {
1733: PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur);
1734: data[i * (size_schur + 1)] = infty;
1735: }
1736: MatDenseRestoreArray(S_all, &data);
1737: MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED);
1738: }
1739: PetscFree(work);
1740: PetscFree(schur_factor);
1741: VecDestroy(&Dall);
1742: }
1743: ISDestroy(&is_I_layer);
1744: MatDestroy(&S_all);
1745: MatDestroy(&A_BB);
1746: MatDestroy(&A_IB);
1747: MatDestroy(&A_BI);
1748: MatDestroy(&F);
1750: PetscMalloc1(sub_schurs->n_subs, &nnz);
1751: for (i = 0; i < sub_schurs->n_subs; i++) ISGetLocalSize(sub_schurs->is_subs[i], &nnz[i]);
1752: ISCreateGeneral(PETSC_COMM_SELF, sub_schurs->n_subs, nnz, PETSC_OWN_POINTER, &is_I_layer);
1753: MatSetVariableBlockSizes(sub_schurs->S_Ej_all, sub_schurs->n_subs, nnz);
1754: MatAssemblyBegin(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY);
1755: MatAssemblyEnd(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY);
1756: if (compute_Stilda) {
1757: MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all, sub_schurs->n_subs, nnz);
1758: MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY);
1759: MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY);
1760: if (deluxe) {
1761: MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all, sub_schurs->n_subs, nnz);
1762: MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY);
1763: MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY);
1764: }
1765: }
1766: ISDestroy(&is_I_layer);
1768: /* Get local part of (\sum_j S_Ej) */
1769: if (!sub_schurs->sum_S_Ej_all) MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_all);
1770: VecSet(gstash, 0.0);
1771: MatSeqAIJGetArray(sub_schurs->S_Ej_all, &stasharray);
1772: VecPlaceArray(lstash, stasharray);
1773: VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD);
1774: VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD);
1775: MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &stasharray);
1776: VecResetArray(lstash);
1777: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &stasharray);
1778: VecPlaceArray(lstash, stasharray);
1779: VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE);
1780: VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE);
1781: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &stasharray);
1782: VecResetArray(lstash);
1784: /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1785: if (compute_Stilda) {
1786: VecSet(gstash, 0.0);
1787: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray);
1788: VecPlaceArray(lstash, stasharray);
1789: VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD);
1790: VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD);
1791: VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE);
1792: VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE);
1793: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray);
1794: VecResetArray(lstash);
1795: if (deluxe) {
1796: VecSet(gstash, 0.0);
1797: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &stasharray);
1798: VecPlaceArray(lstash, stasharray);
1799: VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD);
1800: VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD);
1801: VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE);
1802: VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE);
1803: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &stasharray);
1804: VecResetArray(lstash);
1805: } else if (!sub_schurs->gdsw) {
1806: PetscScalar *array;
1807: PetscInt cum;
1809: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &array);
1810: cum = 0;
1811: for (i = 0; i < sub_schurs->n_subs; i++) {
1812: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
1813: PetscBLASIntCast(subset_size, &B_N);
1814: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1815: if (use_potr) {
1816: PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, array + cum, &B_N, &B_ierr));
1818: PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, array + cum, &B_N, &B_ierr));
1820: } else if (use_sytr) {
1821: PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1823: PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_ierr));
1825: } else {
1826: PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, array + cum, &B_N, pivots, &B_ierr));
1828: PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1830: }
1831: PetscLogFlops(1.0 * subset_size * subset_size * subset_size);
1832: PetscFPTrapPop();
1833: cum += subset_size * subset_size;
1834: }
1835: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &array);
1836: PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all);
1837: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1838: sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
1839: }
1840: }
1841: VecDestroy(&lstash);
1842: VecDestroy(&gstash);
1843: VecScatterDestroy(&sstash);
1845: if (matl_dbg_viewer) {
1846: if (sub_schurs->S_Ej_all) {
1847: PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all, "SE");
1848: MatView(sub_schurs->S_Ej_all, matl_dbg_viewer);
1849: }
1850: if (sub_schurs->sum_S_Ej_all) {
1851: PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all, "SSE");
1852: MatView(sub_schurs->sum_S_Ej_all, matl_dbg_viewer);
1853: }
1854: if (sub_schurs->sum_S_Ej_inv_all) {
1855: PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all, "SSEm");
1856: MatView(sub_schurs->sum_S_Ej_inv_all, matl_dbg_viewer);
1857: }
1858: if (sub_schurs->sum_S_Ej_tilda_all) {
1859: PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all, "SSEt");
1860: MatView(sub_schurs->sum_S_Ej_tilda_all, matl_dbg_viewer);
1861: }
1862: }
1864: /* free workspace */
1865: if (matl_dbg_viewer) PetscViewerFlush(matl_dbg_viewer);
1866: if (sub_schurs->debug) MPI_Barrier(comm_n);
1867: PetscViewerDestroy(&matl_dbg_viewer);
1868: PetscFree2(Bwork, pivots);
1869: PetscCommDestroy(&comm_n);
1870: return 0;
1871: }
1873: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char *prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw)
1874: {
1875: IS *faces, *edges, *all_cc, vertices;
1876: PetscInt s, i, n_faces, n_edges, n_all_cc;
1877: PetscBool is_sorted, ispardiso, ismumps;
1879: ISSorted(is_I, &is_sorted);
1881: ISSorted(is_B, &is_sorted);
1884: /* reset any previous data */
1885: PCBDDCSubSchursReset(sub_schurs);
1887: sub_schurs->gdsw = gdsw;
1889: /* get index sets for faces and edges (already sorted by global ordering) */
1890: PCBDDCGraphGetCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices);
1891: n_all_cc = n_faces + n_edges;
1892: PetscBTCreate(n_all_cc, &sub_schurs->is_edge);
1893: PetscMalloc1(n_all_cc, &all_cc);
1894: n_all_cc = 0;
1895: for (i = 0; i < n_faces; i++) {
1896: ISGetSize(faces[i], &s);
1897: if (!s) continue;
1898: if (copycc) {
1899: ISDuplicate(faces[i], &all_cc[n_all_cc]);
1900: } else {
1901: PetscObjectReference((PetscObject)faces[i]);
1902: all_cc[n_all_cc] = faces[i];
1903: }
1904: n_all_cc++;
1905: }
1906: for (i = 0; i < n_edges; i++) {
1907: ISGetSize(edges[i], &s);
1908: if (!s) continue;
1909: if (copycc) {
1910: ISDuplicate(edges[i], &all_cc[n_all_cc]);
1911: } else {
1912: PetscObjectReference((PetscObject)edges[i]);
1913: all_cc[n_all_cc] = edges[i];
1914: }
1915: PetscBTSet(sub_schurs->is_edge, n_all_cc);
1916: n_all_cc++;
1917: }
1918: PetscObjectReference((PetscObject)vertices);
1919: sub_schurs->is_vertices = vertices;
1920: PCBDDCGraphRestoreCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices);
1921: sub_schurs->is_dir = NULL;
1922: PCBDDCGraphGetDirichletDofsB(graph, &sub_schurs->is_dir);
1924: /* Determine if MatFactor can be used */
1925: PetscStrallocpy(prefix, &sub_schurs->prefix);
1926: #if defined(PETSC_HAVE_MUMPS)
1927: PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMUMPS, sizeof(sub_schurs->mat_solver_type));
1928: #elif defined(PETSC_HAVE_MKL_PARDISO)
1929: PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, sizeof(sub_schurs->mat_solver_type));
1930: #else
1931: PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERPETSC, sizeof(sub_schurs->mat_solver_type));
1932: #endif
1933: #if defined(PETSC_USE_COMPLEX)
1934: sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
1935: #else
1936: sub_schurs->is_hermitian = PETSC_TRUE;
1937: #endif
1938: sub_schurs->is_posdef = PETSC_TRUE;
1939: sub_schurs->is_symmetric = PETSC_TRUE;
1940: sub_schurs->debug = PETSC_FALSE;
1941: sub_schurs->restrict_comm = PETSC_FALSE;
1942: PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
1943: PetscOptionsString("-sub_schurs_mat_solver_type", "Specific direct solver to use", NULL, sub_schurs->mat_solver_type, sub_schurs->mat_solver_type, sizeof(sub_schurs->mat_solver_type), NULL);
1944: PetscOptionsBool("-sub_schurs_symmetric", "Symmetric problem", NULL, sub_schurs->is_symmetric, &sub_schurs->is_symmetric, NULL);
1945: PetscOptionsBool("-sub_schurs_hermitian", "Hermitian problem", NULL, sub_schurs->is_hermitian, &sub_schurs->is_hermitian, NULL);
1946: PetscOptionsBool("-sub_schurs_posdef", "Positive definite problem", NULL, sub_schurs->is_posdef, &sub_schurs->is_posdef, NULL);
1947: PetscOptionsBool("-sub_schurs_restrictcomm", "Restrict communicator on active processes only", NULL, sub_schurs->restrict_comm, &sub_schurs->restrict_comm, NULL);
1948: PetscOptionsBool("-sub_schurs_debug", "Debug output", NULL, sub_schurs->debug, &sub_schurs->debug, NULL);
1949: PetscOptionsEnd();
1950: PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMUMPS, &ismumps);
1951: PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &ispardiso);
1952: sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
1954: /* for reals, symmetric and hermitian are synonims */
1955: #if !defined(PETSC_USE_COMPLEX)
1956: sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
1957: sub_schurs->is_hermitian = sub_schurs->is_symmetric;
1958: #endif
1960: PetscObjectReference((PetscObject)is_I);
1961: sub_schurs->is_I = is_I;
1962: PetscObjectReference((PetscObject)is_B);
1963: sub_schurs->is_B = is_B;
1964: PetscObjectReference((PetscObject)graph->l2gmap);
1965: sub_schurs->l2gmap = graph->l2gmap;
1966: PetscObjectReference((PetscObject)BtoNmap);
1967: sub_schurs->BtoNmap = BtoNmap;
1968: sub_schurs->n_subs = n_all_cc;
1969: sub_schurs->is_subs = all_cc;
1970: sub_schurs->S_Ej_all = NULL;
1971: sub_schurs->sum_S_Ej_all = NULL;
1972: sub_schurs->sum_S_Ej_inv_all = NULL;
1973: sub_schurs->sum_S_Ej_tilda_all = NULL;
1974: sub_schurs->is_Ej_all = NULL;
1975: return 0;
1976: }
1978: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1979: {
1980: PCBDDCSubSchurs schurs_ctx;
1982: PetscNew(&schurs_ctx);
1983: schurs_ctx->n_subs = 0;
1984: *sub_schurs = schurs_ctx;
1985: return 0;
1986: }
1988: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1989: {
1990: PetscInt i;
1992: if (!sub_schurs) return 0;
1993: PetscFree(sub_schurs->prefix);
1994: MatDestroy(&sub_schurs->A);
1995: MatDestroy(&sub_schurs->S);
1996: ISDestroy(&sub_schurs->is_I);
1997: ISDestroy(&sub_schurs->is_B);
1998: ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);
1999: ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);
2000: MatDestroy(&sub_schurs->S_Ej_all);
2001: MatDestroy(&sub_schurs->sum_S_Ej_all);
2002: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
2003: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
2004: ISDestroy(&sub_schurs->is_Ej_all);
2005: ISDestroy(&sub_schurs->is_vertices);
2006: ISDestroy(&sub_schurs->is_dir);
2007: PetscBTDestroy(&sub_schurs->is_edge);
2008: for (i = 0; i < sub_schurs->n_subs; i++) ISDestroy(&sub_schurs->is_subs[i]);
2009: if (sub_schurs->n_subs) PetscFree(sub_schurs->is_subs);
2010: if (sub_schurs->reuse_solver) PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
2011: PetscFree(sub_schurs->reuse_solver);
2012: if (sub_schurs->change) {
2013: for (i = 0; i < sub_schurs->n_subs; i++) {
2014: KSPDestroy(&sub_schurs->change[i]);
2015: ISDestroy(&sub_schurs->change_primal_sub[i]);
2016: }
2017: }
2018: PetscFree(sub_schurs->change);
2019: PetscFree(sub_schurs->change_primal_sub);
2020: sub_schurs->n_subs = 0;
2021: return 0;
2022: }
2024: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
2025: {
2026: PCBDDCSubSchursReset(*sub_schurs);
2027: PetscFree(*sub_schurs);
2028: return 0;
2029: }
2031: static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *queue_tip, PetscInt n_prev, PetscBT touched, PetscInt *xadj, PetscInt *adjncy, PetscInt *n_added)
2032: {
2033: PetscInt i, j, n;
2035: n = 0;
2036: for (i = -n_prev; i < 0; i++) {
2037: PetscInt start_dof = queue_tip[i];
2038: for (j = xadj[start_dof]; j < xadj[start_dof + 1]; j++) {
2039: PetscInt dof = adjncy[j];
2040: if (!PetscBTLookup(touched, dof)) {
2041: PetscBTSet(touched, dof);
2042: queue_tip[n] = dof;
2043: n++;
2044: }
2045: }
2046: }
2047: *n_added = n;
2048: return 0;
2049: }