Actual source code: mumps.c
2: /*
3: Provides an interface to the MUMPS sparse solver
4: */
5: #include <petscpkg_version.h>
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8: #include <../src/mat/impls/sell/mpi/mpisell.h>
10: EXTERN_C_BEGIN
11: #if defined(PETSC_USE_COMPLEX)
12: #if defined(PETSC_USE_REAL_SINGLE)
13: #include <cmumps_c.h>
14: #else
15: #include <zmumps_c.h>
16: #endif
17: #else
18: #if defined(PETSC_USE_REAL_SINGLE)
19: #include <smumps_c.h>
20: #else
21: #include <dmumps_c.h>
22: #endif
23: #endif
24: EXTERN_C_END
25: #define JOB_INIT -1
26: #define JOB_NULL 0
27: #define JOB_FACTSYMBOLIC 1
28: #define JOB_FACTNUMERIC 2
29: #define JOB_SOLVE 3
30: #define JOB_END -2
32: /* calls to MUMPS */
33: #if defined(PETSC_USE_COMPLEX)
34: #if defined(PETSC_USE_REAL_SINGLE)
35: #define MUMPS_c cmumps_c
36: #else
37: #define MUMPS_c zmumps_c
38: #endif
39: #else
40: #if defined(PETSC_USE_REAL_SINGLE)
41: #define MUMPS_c smumps_c
42: #else
43: #define MUMPS_c dmumps_c
44: #endif
45: #endif
47: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
48: number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
49: naming convention in PetscMPIInt, PetscBLASInt etc.
50: */
51: typedef MUMPS_INT PetscMUMPSInt;
53: #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
54: #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
55: #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
56: #endif
57: #else
58: #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
59: #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
60: #endif
61: #endif
63: #define MPIU_MUMPSINT MPI_INT
64: #define PETSC_MUMPS_INT_MAX 2147483647
65: #define PETSC_MUMPS_INT_MIN -2147483648
67: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
68: static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
69: {
70: #if PetscDefined(USE_64BIT_INDICES)
71: PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
72: #endif
73: *b = (PetscMUMPSInt)(a);
74: return 0;
75: }
77: /* Put these utility routines here since they are only used in this file */
78: static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
79: {
80: PetscInt myval;
81: PetscBool myset;
82: /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
83: PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub);
84: if (myset) PetscMUMPSIntCast(myval, value);
85: if (set) *set = myset;
86: return 0;
87: }
88: #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
90: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
91: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
92: #define PetscMUMPS_c(mumps) \
93: do { \
94: if (mumps->use_petsc_omp_support) { \
95: if (mumps->is_omp_master) { \
96: PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl); \
97: MUMPS_c(&mumps->id); \
98: PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl); \
99: } \
100: PetscOmpCtrlBarrier(mumps->omp_ctrl); \
101: /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \
102: to processes, so we only Bcast info[1], an error code and leave others (since they do not have \
103: an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \
104: omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105: */ \
106: MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm); \
107: MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm); \
108: MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm); \
109: } else { \
110: MUMPS_c(&mumps->id); \
111: } \
112: } while (0)
113: #else
114: #define PetscMUMPS_c(mumps) \
115: do { \
116: MUMPS_c(&mumps->id); \
117: } while (0)
118: #endif
120: /* declare MumpsScalar */
121: #if defined(PETSC_USE_COMPLEX)
122: #if defined(PETSC_USE_REAL_SINGLE)
123: #define MumpsScalar mumps_complex
124: #else
125: #define MumpsScalar mumps_double_complex
126: #endif
127: #else
128: #define MumpsScalar PetscScalar
129: #endif
131: /* macros s.t. indices match MUMPS documentation */
132: #define ICNTL(I) icntl[(I)-1]
133: #define CNTL(I) cntl[(I)-1]
134: #define INFOG(I) infog[(I)-1]
135: #define INFO(I) info[(I)-1]
136: #define RINFOG(I) rinfog[(I)-1]
137: #define RINFO(I) rinfo[(I)-1]
139: typedef struct Mat_MUMPS Mat_MUMPS;
140: struct Mat_MUMPS {
141: #if defined(PETSC_USE_COMPLEX)
142: #if defined(PETSC_USE_REAL_SINGLE)
143: CMUMPS_STRUC_C id;
144: #else
145: ZMUMPS_STRUC_C id;
146: #endif
147: #else
148: #if defined(PETSC_USE_REAL_SINGLE)
149: SMUMPS_STRUC_C id;
150: #else
151: DMUMPS_STRUC_C id;
152: #endif
153: #endif
155: MatStructure matstruc;
156: PetscMPIInt myid, petsc_size;
157: PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */
158: PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
159: PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */
160: PetscMUMPSInt sym;
161: MPI_Comm mumps_comm;
162: PetscMUMPSInt *ICNTL_pre;
163: PetscReal *CNTL_pre;
164: PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */
165: VecScatter scat_rhs, scat_sol; /* used by MatSolve() */
166: PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */
167: PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc;
168: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
169: PetscInt *rhs_nrow, max_nrhs;
170: PetscMPIInt *rhs_recvcounts, *rhs_disps;
171: PetscScalar *rhs_loc, *rhs_recvbuf;
172: #endif
173: Vec b_seq, x_seq;
174: PetscInt ninfo, *info; /* which INFO to display */
175: PetscInt sizeredrhs;
176: PetscScalar *schur_sol;
177: PetscInt schur_sizesol;
178: PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
179: PetscInt64 cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */
180: PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
182: /* stuff used by petsc/mumps OpenMP support*/
183: PetscBool use_petsc_omp_support;
184: PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
185: MPI_Comm petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
186: PetscInt64 *recvcount; /* a collection of nnz on omp_master */
187: PetscMPIInt tag, omp_comm_size;
188: PetscBool is_omp_master; /* is this rank the master of omp_comm */
189: MPI_Request *reqs;
190: };
192: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
193: Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
194: */
195: static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
196: {
197: PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
199: #if defined(PETSC_USE_64BIT_INDICES)
200: {
201: PetscInt i;
202: if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
203: PetscFree(mumps->ia_alloc);
204: PetscMalloc1(nrow + 1, &mumps->ia_alloc);
205: mumps->cur_ilen = nrow + 1;
206: }
207: if (nnz > mumps->cur_jlen) {
208: PetscFree(mumps->ja_alloc);
209: PetscMalloc1(nnz, &mumps->ja_alloc);
210: mumps->cur_jlen = nnz;
211: }
212: for (i = 0; i < nrow + 1; i++) PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i]));
213: for (i = 0; i < nnz; i++) PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i]));
214: *ia_mumps = mumps->ia_alloc;
215: *ja_mumps = mumps->ja_alloc;
216: }
217: #else
218: *ia_mumps = ia;
219: *ja_mumps = ja;
220: #endif
221: PetscMUMPSIntCast(nnz, nnz_mumps);
222: return 0;
223: }
225: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
226: {
227: PetscFree(mumps->id.listvar_schur);
228: PetscFree(mumps->id.redrhs);
229: PetscFree(mumps->schur_sol);
230: mumps->id.size_schur = 0;
231: mumps->id.schur_lld = 0;
232: mumps->id.ICNTL(19) = 0;
233: return 0;
234: }
236: /* solve with rhs in mumps->id.redrhs and return in the same location */
237: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
238: {
239: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
240: Mat S, B, X;
241: MatFactorSchurStatus schurstatus;
242: PetscInt sizesol;
244: MatFactorFactorizeSchurComplement(F);
245: MatFactorGetSchurComplement(F, &S, &schurstatus);
246: MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B);
247: MatSetType(B, ((PetscObject)S)->type_name);
248: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
249: MatBindToCPU(B, S->boundtocpu);
250: #endif
251: switch (schurstatus) {
252: case MAT_FACTOR_SCHUR_FACTORED:
253: MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X);
254: MatSetType(X, ((PetscObject)S)->type_name);
255: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
256: MatBindToCPU(X, S->boundtocpu);
257: #endif
258: if (!mumps->id.ICNTL(9)) { /* transpose solve */
259: MatMatSolveTranspose(S, B, X);
260: } else {
261: MatMatSolve(S, B, X);
262: }
263: break;
264: case MAT_FACTOR_SCHUR_INVERTED:
265: sizesol = mumps->id.nrhs * mumps->id.size_schur;
266: if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
267: PetscFree(mumps->schur_sol);
268: PetscMalloc1(sizesol, &mumps->schur_sol);
269: mumps->schur_sizesol = sizesol;
270: }
271: MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X);
272: MatSetType(X, ((PetscObject)S)->type_name);
273: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
274: MatBindToCPU(X, S->boundtocpu);
275: #endif
276: MatProductCreateWithMat(S, B, NULL, X);
277: if (!mumps->id.ICNTL(9)) { /* transpose solve */
278: MatProductSetType(X, MATPRODUCT_AtB);
279: } else {
280: MatProductSetType(X, MATPRODUCT_AB);
281: }
282: MatProductSetFromOptions(X);
283: MatProductSymbolic(X);
284: MatProductNumeric(X);
286: MatCopy(X, B, SAME_NONZERO_PATTERN);
287: break;
288: default:
289: SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
290: }
291: MatFactorRestoreSchurComplement(F, &S, schurstatus);
292: MatDestroy(&B);
293: MatDestroy(&X);
294: return 0;
295: }
297: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
298: {
299: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
301: if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
302: return 0;
303: }
304: if (!expansion) { /* prepare for the condensation step */
305: PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
306: /* allocate MUMPS internal array to store reduced right-hand sides */
307: if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
308: PetscFree(mumps->id.redrhs);
309: mumps->id.lredrhs = mumps->id.size_schur;
310: PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs);
311: mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
312: }
313: mumps->id.ICNTL(26) = 1; /* condensation phase */
314: } else { /* prepare for the expansion step */
315: /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
316: MatMumpsSolveSchur_Private(F);
317: mumps->id.ICNTL(26) = 2; /* expansion phase */
318: PetscMUMPS_c(mumps);
320: /* restore defaults */
321: mumps->id.ICNTL(26) = -1;
322: /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
323: if (mumps->id.nrhs > 1) {
324: PetscFree(mumps->id.redrhs);
325: mumps->id.lredrhs = 0;
326: mumps->sizeredrhs = 0;
327: }
328: }
329: return 0;
330: }
332: /*
333: MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
335: input:
336: A - matrix in aij,baij or sbaij format
337: shift - 0: C style output triple; 1: Fortran style output triple.
338: reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
339: MAT_REUSE_MATRIX: only the values in v array are updated
340: output:
341: nnz - dim of r, c, and v (number of local nonzero entries of A)
342: r, c, v - row and col index, matrix values (matrix triples)
344: The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
345: freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
346: that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
348: */
350: PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
351: {
352: const PetscScalar *av;
353: const PetscInt *ai, *aj, *ajj, M = A->rmap->n;
354: PetscInt64 nz, rnz, i, j, k;
355: PetscMUMPSInt *row, *col;
356: Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data;
358: MatSeqAIJGetArrayRead(A, &av);
359: mumps->val = (PetscScalar *)av;
360: if (reuse == MAT_INITIAL_MATRIX) {
361: nz = aa->nz;
362: ai = aa->i;
363: aj = aa->j;
364: PetscMalloc2(nz, &row, nz, &col);
365: for (i = k = 0; i < M; i++) {
366: rnz = ai[i + 1] - ai[i];
367: ajj = aj + ai[i];
368: for (j = 0; j < rnz; j++) {
369: PetscMUMPSIntCast(i + shift, &row[k]);
370: PetscMUMPSIntCast(ajj[j] + shift, &col[k]);
371: k++;
372: }
373: }
374: mumps->irn = row;
375: mumps->jcn = col;
376: mumps->nnz = nz;
377: }
378: MatSeqAIJRestoreArrayRead(A, &av);
379: return 0;
380: }
382: PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
383: {
384: PetscInt64 nz, i, j, k, r;
385: Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
386: PetscMUMPSInt *row, *col;
388: mumps->val = a->val;
389: if (reuse == MAT_INITIAL_MATRIX) {
390: nz = a->sliidx[a->totalslices];
391: PetscMalloc2(nz, &row, nz, &col);
392: for (i = k = 0; i < a->totalslices; i++) {
393: for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscMUMPSIntCast(8 * i + r + shift, &row[k++]);
394: }
395: for (i = 0; i < nz; i++) PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]);
396: mumps->irn = row;
397: mumps->jcn = col;
398: mumps->nnz = nz;
399: }
400: return 0;
401: }
403: PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
404: {
405: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data;
406: const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
407: PetscInt64 M, nz, idx = 0, rnz, i, j, k, m;
408: PetscInt bs;
409: PetscMUMPSInt *row, *col;
411: MatGetBlockSize(A, &bs);
412: M = A->rmap->N / bs;
413: mumps->val = aa->a;
414: if (reuse == MAT_INITIAL_MATRIX) {
415: ai = aa->i;
416: aj = aa->j;
417: nz = bs2 * aa->nz;
418: PetscMalloc2(nz, &row, nz, &col);
419: for (i = 0; i < M; i++) {
420: ajj = aj + ai[i];
421: rnz = ai[i + 1] - ai[i];
422: for (k = 0; k < rnz; k++) {
423: for (j = 0; j < bs; j++) {
424: for (m = 0; m < bs; m++) {
425: PetscMUMPSIntCast(i * bs + m + shift, &row[idx]);
426: PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]);
427: idx++;
428: }
429: }
430: }
431: }
432: mumps->irn = row;
433: mumps->jcn = col;
434: mumps->nnz = nz;
435: }
436: return 0;
437: }
439: PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
440: {
441: const PetscInt *ai, *aj, *ajj;
442: PetscInt bs;
443: PetscInt64 nz, rnz, i, j, k, m;
444: PetscMUMPSInt *row, *col;
445: PetscScalar *val;
446: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data;
447: const PetscInt bs2 = aa->bs2, mbs = aa->mbs;
448: #if defined(PETSC_USE_COMPLEX)
449: PetscBool isset, hermitian;
450: #endif
452: #if defined(PETSC_USE_COMPLEX)
453: MatIsHermitianKnown(A, &isset, &hermitian);
455: #endif
456: ai = aa->i;
457: aj = aa->j;
458: MatGetBlockSize(A, &bs);
459: if (reuse == MAT_INITIAL_MATRIX) {
460: nz = aa->nz;
461: PetscMalloc2(bs2 * nz, &row, bs2 * nz, &col);
462: if (bs > 1) {
463: PetscMalloc1(bs2 * nz, &mumps->val_alloc);
464: mumps->val = mumps->val_alloc;
465: } else {
466: mumps->val = aa->a;
467: }
468: mumps->irn = row;
469: mumps->jcn = col;
470: } else {
471: if (bs == 1) mumps->val = aa->a;
472: row = mumps->irn;
473: col = mumps->jcn;
474: }
475: val = mumps->val;
477: nz = 0;
478: if (bs > 1) {
479: for (i = 0; i < mbs; i++) {
480: rnz = ai[i + 1] - ai[i];
481: ajj = aj + ai[i];
482: for (j = 0; j < rnz; j++) {
483: for (k = 0; k < bs; k++) {
484: for (m = 0; m < bs; m++) {
485: if (ajj[j] > i || k >= m) {
486: if (reuse == MAT_INITIAL_MATRIX) {
487: PetscMUMPSIntCast(i * bs + m + shift, &row[nz]);
488: PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]);
489: }
490: val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
491: }
492: }
493: }
494: }
495: }
496: } else if (reuse == MAT_INITIAL_MATRIX) {
497: for (i = 0; i < mbs; i++) {
498: rnz = ai[i + 1] - ai[i];
499: ajj = aj + ai[i];
500: for (j = 0; j < rnz; j++) {
501: PetscMUMPSIntCast(i + shift, &row[nz]);
502: PetscMUMPSIntCast(ajj[j] + shift, &col[nz]);
503: nz++;
504: }
505: }
507: }
508: if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
509: return 0;
510: }
512: PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
513: {
514: const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n;
515: PetscInt64 nz, rnz, i, j;
516: const PetscScalar *av, *v1;
517: PetscScalar *val;
518: PetscMUMPSInt *row, *col;
519: Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data;
520: PetscBool missing;
521: #if defined(PETSC_USE_COMPLEX)
522: PetscBool hermitian, isset;
523: #endif
525: #if defined(PETSC_USE_COMPLEX)
526: MatIsHermitianKnown(A, &isset, &hermitian);
528: #endif
529: MatSeqAIJGetArrayRead(A, &av);
530: ai = aa->i;
531: aj = aa->j;
532: adiag = aa->diag;
533: MatMissingDiagonal_SeqAIJ(A, &missing, NULL);
534: if (reuse == MAT_INITIAL_MATRIX) {
535: /* count nz in the upper triangular part of A */
536: nz = 0;
537: if (missing) {
538: for (i = 0; i < M; i++) {
539: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
540: for (j = ai[i]; j < ai[i + 1]; j++) {
541: if (aj[j] < i) continue;
542: nz++;
543: }
544: } else {
545: nz += ai[i + 1] - adiag[i];
546: }
547: }
548: } else {
549: for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
550: }
551: PetscMalloc2(nz, &row, nz, &col);
552: PetscMalloc1(nz, &val);
553: mumps->nnz = nz;
554: mumps->irn = row;
555: mumps->jcn = col;
556: mumps->val = mumps->val_alloc = val;
558: nz = 0;
559: if (missing) {
560: for (i = 0; i < M; i++) {
561: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
562: for (j = ai[i]; j < ai[i + 1]; j++) {
563: if (aj[j] < i) continue;
564: PetscMUMPSIntCast(i + shift, &row[nz]);
565: PetscMUMPSIntCast(aj[j] + shift, &col[nz]);
566: val[nz] = av[j];
567: nz++;
568: }
569: } else {
570: rnz = ai[i + 1] - adiag[i];
571: ajj = aj + adiag[i];
572: v1 = av + adiag[i];
573: for (j = 0; j < rnz; j++) {
574: PetscMUMPSIntCast(i + shift, &row[nz]);
575: PetscMUMPSIntCast(ajj[j] + shift, &col[nz]);
576: val[nz++] = v1[j];
577: }
578: }
579: }
580: } else {
581: for (i = 0; i < M; i++) {
582: rnz = ai[i + 1] - adiag[i];
583: ajj = aj + adiag[i];
584: v1 = av + adiag[i];
585: for (j = 0; j < rnz; j++) {
586: PetscMUMPSIntCast(i + shift, &row[nz]);
587: PetscMUMPSIntCast(ajj[j] + shift, &col[nz]);
588: val[nz++] = v1[j];
589: }
590: }
591: }
592: } else {
593: nz = 0;
594: val = mumps->val;
595: if (missing) {
596: for (i = 0; i < M; i++) {
597: if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
598: for (j = ai[i]; j < ai[i + 1]; j++) {
599: if (aj[j] < i) continue;
600: val[nz++] = av[j];
601: }
602: } else {
603: rnz = ai[i + 1] - adiag[i];
604: v1 = av + adiag[i];
605: for (j = 0; j < rnz; j++) val[nz++] = v1[j];
606: }
607: }
608: } else {
609: for (i = 0; i < M; i++) {
610: rnz = ai[i + 1] - adiag[i];
611: v1 = av + adiag[i];
612: for (j = 0; j < rnz; j++) val[nz++] = v1[j];
613: }
614: }
615: }
616: MatSeqAIJRestoreArrayRead(A, &av);
617: return 0;
618: }
620: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
621: {
622: const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
623: PetscInt bs;
624: PetscInt64 rstart, nz, i, j, k, m, jj, irow, countA, countB;
625: PetscMUMPSInt *row, *col;
626: const PetscScalar *av, *bv, *v1, *v2;
627: PetscScalar *val;
628: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data;
629: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data;
630: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data;
631: const PetscInt bs2 = aa->bs2, mbs = aa->mbs;
632: #if defined(PETSC_USE_COMPLEX)
633: PetscBool hermitian, isset;
634: #endif
636: #if defined(PETSC_USE_COMPLEX)
637: MatIsHermitianKnown(A, &isset, &hermitian);
639: #endif
640: MatGetBlockSize(A, &bs);
641: rstart = A->rmap->rstart;
642: ai = aa->i;
643: aj = aa->j;
644: bi = bb->i;
645: bj = bb->j;
646: av = aa->a;
647: bv = bb->a;
649: garray = mat->garray;
651: if (reuse == MAT_INITIAL_MATRIX) {
652: nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
653: PetscMalloc2(nz, &row, nz, &col);
654: PetscMalloc1(nz, &val);
655: /* can not decide the exact mumps->nnz now because of the SBAIJ */
656: mumps->irn = row;
657: mumps->jcn = col;
658: mumps->val = mumps->val_alloc = val;
659: } else {
660: val = mumps->val;
661: }
663: jj = 0;
664: irow = rstart;
665: for (i = 0; i < mbs; i++) {
666: ajj = aj + ai[i]; /* ptr to the beginning of this row */
667: countA = ai[i + 1] - ai[i];
668: countB = bi[i + 1] - bi[i];
669: bjj = bj + bi[i];
670: v1 = av + ai[i] * bs2;
671: v2 = bv + bi[i] * bs2;
673: if (bs > 1) {
674: /* A-part */
675: for (j = 0; j < countA; j++) {
676: for (k = 0; k < bs; k++) {
677: for (m = 0; m < bs; m++) {
678: if (rstart + ajj[j] * bs > irow || k >= m) {
679: if (reuse == MAT_INITIAL_MATRIX) {
680: PetscMUMPSIntCast(irow + m + shift, &row[jj]);
681: PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]);
682: }
683: val[jj++] = v1[j * bs2 + m + k * bs];
684: }
685: }
686: }
687: }
689: /* B-part */
690: for (j = 0; j < countB; j++) {
691: for (k = 0; k < bs; k++) {
692: for (m = 0; m < bs; m++) {
693: if (reuse == MAT_INITIAL_MATRIX) {
694: PetscMUMPSIntCast(irow + m + shift, &row[jj]);
695: PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]);
696: }
697: val[jj++] = v2[j * bs2 + m + k * bs];
698: }
699: }
700: }
701: } else {
702: /* A-part */
703: for (j = 0; j < countA; j++) {
704: if (reuse == MAT_INITIAL_MATRIX) {
705: PetscMUMPSIntCast(irow + shift, &row[jj]);
706: PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]);
707: }
708: val[jj++] = v1[j];
709: }
711: /* B-part */
712: for (j = 0; j < countB; j++) {
713: if (reuse == MAT_INITIAL_MATRIX) {
714: PetscMUMPSIntCast(irow + shift, &row[jj]);
715: PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]);
716: }
717: val[jj++] = v2[j];
718: }
719: }
720: irow += bs;
721: }
722: mumps->nnz = jj;
723: return 0;
724: }
726: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
727: {
728: const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
729: PetscInt64 rstart, nz, i, j, jj, irow, countA, countB;
730: PetscMUMPSInt *row, *col;
731: const PetscScalar *av, *bv, *v1, *v2;
732: PetscScalar *val;
733: Mat Ad, Ao;
734: Mat_SeqAIJ *aa;
735: Mat_SeqAIJ *bb;
737: MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray);
738: MatSeqAIJGetArrayRead(Ad, &av);
739: MatSeqAIJGetArrayRead(Ao, &bv);
741: aa = (Mat_SeqAIJ *)(Ad)->data;
742: bb = (Mat_SeqAIJ *)(Ao)->data;
743: ai = aa->i;
744: aj = aa->j;
745: bi = bb->i;
746: bj = bb->j;
748: rstart = A->rmap->rstart;
750: if (reuse == MAT_INITIAL_MATRIX) {
751: nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
752: PetscMalloc2(nz, &row, nz, &col);
753: PetscMalloc1(nz, &val);
754: mumps->nnz = nz;
755: mumps->irn = row;
756: mumps->jcn = col;
757: mumps->val = mumps->val_alloc = val;
758: } else {
759: val = mumps->val;
760: }
762: jj = 0;
763: irow = rstart;
764: for (i = 0; i < m; i++) {
765: ajj = aj + ai[i]; /* ptr to the beginning of this row */
766: countA = ai[i + 1] - ai[i];
767: countB = bi[i + 1] - bi[i];
768: bjj = bj + bi[i];
769: v1 = av + ai[i];
770: v2 = bv + bi[i];
772: /* A-part */
773: for (j = 0; j < countA; j++) {
774: if (reuse == MAT_INITIAL_MATRIX) {
775: PetscMUMPSIntCast(irow + shift, &row[jj]);
776: PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]);
777: }
778: val[jj++] = v1[j];
779: }
781: /* B-part */
782: for (j = 0; j < countB; j++) {
783: if (reuse == MAT_INITIAL_MATRIX) {
784: PetscMUMPSIntCast(irow + shift, &row[jj]);
785: PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]);
786: }
787: val[jj++] = v2[j];
788: }
789: irow++;
790: }
791: MatSeqAIJRestoreArrayRead(Ad, &av);
792: MatSeqAIJRestoreArrayRead(Ao, &bv);
793: return 0;
794: }
796: PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
797: {
798: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data;
799: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data;
800: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data;
801: const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
802: const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart;
803: const PetscInt bs2 = mat->bs2;
804: PetscInt bs;
805: PetscInt64 nz, i, j, k, n, jj, irow, countA, countB, idx;
806: PetscMUMPSInt *row, *col;
807: const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
808: PetscScalar *val;
810: MatGetBlockSize(A, &bs);
811: if (reuse == MAT_INITIAL_MATRIX) {
812: nz = bs2 * (aa->nz + bb->nz);
813: PetscMalloc2(nz, &row, nz, &col);
814: PetscMalloc1(nz, &val);
815: mumps->nnz = nz;
816: mumps->irn = row;
817: mumps->jcn = col;
818: mumps->val = mumps->val_alloc = val;
819: } else {
820: val = mumps->val;
821: }
823: jj = 0;
824: irow = rstart;
825: for (i = 0; i < mbs; i++) {
826: countA = ai[i + 1] - ai[i];
827: countB = bi[i + 1] - bi[i];
828: ajj = aj + ai[i];
829: bjj = bj + bi[i];
830: v1 = av + bs2 * ai[i];
831: v2 = bv + bs2 * bi[i];
833: idx = 0;
834: /* A-part */
835: for (k = 0; k < countA; k++) {
836: for (j = 0; j < bs; j++) {
837: for (n = 0; n < bs; n++) {
838: if (reuse == MAT_INITIAL_MATRIX) {
839: PetscMUMPSIntCast(irow + n + shift, &row[jj]);
840: PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj]);
841: }
842: val[jj++] = v1[idx++];
843: }
844: }
845: }
847: idx = 0;
848: /* B-part */
849: for (k = 0; k < countB; k++) {
850: for (j = 0; j < bs; j++) {
851: for (n = 0; n < bs; n++) {
852: if (reuse == MAT_INITIAL_MATRIX) {
853: PetscMUMPSIntCast(irow + n + shift, &row[jj]);
854: PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]);
855: }
856: val[jj++] = v2[idx++];
857: }
858: }
859: }
860: irow += bs;
861: }
862: return 0;
863: }
865: PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
866: {
867: const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
868: PetscInt64 rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
869: PetscMUMPSInt *row, *col;
870: const PetscScalar *av, *bv, *v1, *v2;
871: PetscScalar *val;
872: Mat Ad, Ao;
873: Mat_SeqAIJ *aa;
874: Mat_SeqAIJ *bb;
875: #if defined(PETSC_USE_COMPLEX)
876: PetscBool hermitian, isset;
877: #endif
879: #if defined(PETSC_USE_COMPLEX)
880: MatIsHermitianKnown(A, &isset, &hermitian);
882: #endif
883: MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray);
884: MatSeqAIJGetArrayRead(Ad, &av);
885: MatSeqAIJGetArrayRead(Ao, &bv);
887: aa = (Mat_SeqAIJ *)(Ad)->data;
888: bb = (Mat_SeqAIJ *)(Ao)->data;
889: ai = aa->i;
890: aj = aa->j;
891: adiag = aa->diag;
892: bi = bb->i;
893: bj = bb->j;
895: rstart = A->rmap->rstart;
897: if (reuse == MAT_INITIAL_MATRIX) {
898: nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
899: nzb = 0; /* num of upper triangular entries in mat->B */
900: for (i = 0; i < m; i++) {
901: nza += (ai[i + 1] - adiag[i]);
902: countB = bi[i + 1] - bi[i];
903: bjj = bj + bi[i];
904: for (j = 0; j < countB; j++) {
905: if (garray[bjj[j]] > rstart) nzb++;
906: }
907: }
909: nz = nza + nzb; /* total nz of upper triangular part of mat */
910: PetscMalloc2(nz, &row, nz, &col);
911: PetscMalloc1(nz, &val);
912: mumps->nnz = nz;
913: mumps->irn = row;
914: mumps->jcn = col;
915: mumps->val = mumps->val_alloc = val;
916: } else {
917: val = mumps->val;
918: }
920: jj = 0;
921: irow = rstart;
922: for (i = 0; i < m; i++) {
923: ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
924: v1 = av + adiag[i];
925: countA = ai[i + 1] - adiag[i];
926: countB = bi[i + 1] - bi[i];
927: bjj = bj + bi[i];
928: v2 = bv + bi[i];
930: /* A-part */
931: for (j = 0; j < countA; j++) {
932: if (reuse == MAT_INITIAL_MATRIX) {
933: PetscMUMPSIntCast(irow + shift, &row[jj]);
934: PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]);
935: }
936: val[jj++] = v1[j];
937: }
939: /* B-part */
940: for (j = 0; j < countB; j++) {
941: if (garray[bjj[j]] > rstart) {
942: if (reuse == MAT_INITIAL_MATRIX) {
943: PetscMUMPSIntCast(irow + shift, &row[jj]);
944: PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]);
945: }
946: val[jj++] = v2[j];
947: }
948: }
949: irow++;
950: }
951: MatSeqAIJRestoreArrayRead(Ad, &av);
952: MatSeqAIJRestoreArrayRead(Ao, &bv);
953: return 0;
954: }
956: PetscErrorCode MatDestroy_MUMPS(Mat A)
957: {
958: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
960: PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc);
961: VecScatterDestroy(&mumps->scat_rhs);
962: VecScatterDestroy(&mumps->scat_sol);
963: VecDestroy(&mumps->b_seq);
964: VecDestroy(&mumps->x_seq);
965: PetscFree(mumps->id.perm_in);
966: PetscFree2(mumps->irn, mumps->jcn);
967: PetscFree(mumps->val_alloc);
968: PetscFree(mumps->info);
969: PetscFree(mumps->ICNTL_pre);
970: PetscFree(mumps->CNTL_pre);
971: MatMumpsResetSchur_Private(mumps);
972: if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
973: mumps->id.job = JOB_END;
974: PetscMUMPS_c(mumps);
976: if (mumps->mumps_comm != MPI_COMM_NULL) {
977: if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) MPI_Comm_free(&mumps->mumps_comm);
978: else PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm);
979: }
980: }
981: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
982: if (mumps->use_petsc_omp_support) {
983: PetscOmpCtrlDestroy(&mumps->omp_ctrl);
984: PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf);
985: PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps);
986: }
987: #endif
988: PetscFree(mumps->ia_alloc);
989: PetscFree(mumps->ja_alloc);
990: PetscFree(mumps->recvcount);
991: PetscFree(mumps->reqs);
992: PetscFree(mumps->irhs_loc);
993: PetscFree(A->data);
995: /* clear composed functions */
996: PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL);
997: PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL);
998: PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL);
999: PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL);
1000: PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL);
1001: PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL);
1002: PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL);
1003: PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL);
1004: PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL);
1005: PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL);
1006: PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL);
1007: PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL);
1008: PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL);
1009: return 0;
1010: }
1012: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1013: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1014: {
1015: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1016: const PetscMPIInt ompsize = mumps->omp_comm_size;
1017: PetscInt i, m, M, rstart;
1019: MatGetSize(A, &M, NULL);
1020: MatGetLocalSize(A, &m, NULL);
1022: if (ompsize == 1) {
1023: if (!mumps->irhs_loc) {
1024: mumps->nloc_rhs = m;
1025: PetscMalloc1(m, &mumps->irhs_loc);
1026: MatGetOwnershipRange(A, &rstart, NULL);
1027: for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1028: }
1029: mumps->id.rhs_loc = (MumpsScalar *)array;
1030: } else {
1031: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1032: const PetscInt *ranges;
1033: PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks;
1034: MPI_Group petsc_group, omp_group;
1035: PetscScalar *recvbuf = NULL;
1037: if (mumps->is_omp_master) {
1038: /* Lazily initialize the omp stuff for distributed rhs */
1039: if (!mumps->irhs_loc) {
1040: PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks);
1041: PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps);
1042: MPI_Comm_group(mumps->petsc_comm, &petsc_group);
1043: MPI_Comm_group(mumps->omp_comm, &omp_group);
1044: for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1045: MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks);
1047: /* Populate mumps->irhs_loc[], rhs_nrow[] */
1048: mumps->nloc_rhs = 0;
1049: MatGetOwnershipRanges(A, &ranges);
1050: for (j = 0; j < ompsize; j++) {
1051: mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1052: mumps->nloc_rhs += mumps->rhs_nrow[j];
1053: }
1054: PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc);
1055: for (j = k = 0; j < ompsize; j++) {
1056: for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1057: }
1059: PetscFree2(omp_ranks, petsc_ranks);
1060: MPI_Group_free(&petsc_group);
1061: MPI_Group_free(&omp_group);
1062: }
1064: /* Realloc buffers when current nrhs is bigger than what we have met */
1065: if (nrhs > mumps->max_nrhs) {
1066: PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf);
1067: PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf);
1068: mumps->max_nrhs = nrhs;
1069: }
1071: /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1072: for (j = 0; j < ompsize; j++) PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]);
1073: mumps->rhs_disps[0] = 0;
1074: for (j = 1; j < ompsize; j++) {
1075: mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1077: }
1078: recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1079: }
1081: PetscMPIIntCast(m * nrhs, &sendcount);
1082: MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm);
1084: if (mumps->is_omp_master) {
1085: if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1086: PetscScalar *dst, *dstbase = mumps->rhs_loc;
1087: for (j = 0; j < ompsize; j++) {
1088: const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1089: dst = dstbase;
1090: for (i = 0; i < nrhs; i++) {
1091: PetscArraycpy(dst, src, mumps->rhs_nrow[j]);
1092: src += mumps->rhs_nrow[j];
1093: dst += mumps->nloc_rhs;
1094: }
1095: dstbase += mumps->rhs_nrow[j];
1096: }
1097: }
1098: mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1099: }
1100: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1101: }
1102: mumps->id.nrhs = nrhs;
1103: mumps->id.nloc_rhs = mumps->nloc_rhs;
1104: mumps->id.lrhs_loc = mumps->nloc_rhs;
1105: mumps->id.irhs_loc = mumps->irhs_loc;
1106: return 0;
1107: }
1109: PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1110: {
1111: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1112: const PetscScalar *rarray = NULL;
1113: PetscScalar *array;
1114: IS is_iden, is_petsc;
1115: PetscInt i;
1116: PetscBool second_solve = PETSC_FALSE;
1117: static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1119: PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM "
1120: "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",
1121: &cite1));
1122: PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel "
1123: "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",
1124: &cite2));
1126: if (A->factorerrortype) {
1127: PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1128: VecSetInf(x);
1129: return 0;
1130: }
1132: mumps->id.nrhs = 1;
1133: if (mumps->petsc_size > 1) {
1134: if (mumps->ICNTL20 == 10) {
1135: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1136: VecGetArrayRead(b, &rarray);
1137: MatMumpsSetUpDistRHSInfo(A, 1, rarray);
1138: } else {
1139: mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1140: VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD);
1141: VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD);
1142: if (!mumps->myid) {
1143: VecGetArray(mumps->b_seq, &array);
1144: mumps->id.rhs = (MumpsScalar *)array;
1145: }
1146: }
1147: } else { /* petsc_size == 1 */
1148: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1149: VecCopy(b, x);
1150: VecGetArray(x, &array);
1151: mumps->id.rhs = (MumpsScalar *)array;
1152: }
1154: /*
1155: handle condensation step of Schur complement (if any)
1156: We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1157: According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1158: Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1159: This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1160: */
1161: if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1163: second_solve = PETSC_TRUE;
1164: MatMumpsHandleSchur_Private(A, PETSC_FALSE);
1165: }
1166: /* solve phase */
1167: /*-------------*/
1168: mumps->id.job = JOB_SOLVE;
1169: PetscMUMPS_c(mumps);
1172: /* handle expansion step of Schur complement (if any) */
1173: if (second_solve) MatMumpsHandleSchur_Private(A, PETSC_TRUE);
1175: if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1176: if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1177: /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1178: VecScatterDestroy(&mumps->scat_sol);
1179: }
1180: if (!mumps->scat_sol) { /* create scatter scat_sol */
1181: PetscInt *isol2_loc = NULL;
1182: ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden); /* from */
1183: PetscMalloc1(mumps->id.lsol_loc, &isol2_loc);
1184: for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */
1185: ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc); /* to */
1186: VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol);
1187: ISDestroy(&is_iden);
1188: ISDestroy(&is_petsc);
1189: mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1190: }
1192: VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD);
1193: VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD);
1194: }
1196: if (mumps->petsc_size > 1) {
1197: if (mumps->ICNTL20 == 10) {
1198: VecRestoreArrayRead(b, &rarray);
1199: } else if (!mumps->myid) {
1200: VecRestoreArray(mumps->b_seq, &array);
1201: }
1202: } else VecRestoreArray(x, &array);
1204: PetscLogFlops(2.0 * mumps->id.RINFO(3));
1205: return 0;
1206: }
1208: PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1209: {
1210: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1212: mumps->id.ICNTL(9) = 0;
1213: MatSolve_MUMPS(A, b, x);
1214: mumps->id.ICNTL(9) = 1;
1215: return 0;
1216: }
1218: PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1219: {
1220: Mat Bt = NULL;
1221: PetscBool denseX, denseB, flg, flgT;
1222: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1223: PetscInt i, nrhs, M;
1224: PetscScalar *array;
1225: const PetscScalar *rbray;
1226: PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0;
1227: PetscMUMPSInt *isol_loc, *isol_loc_save;
1228: PetscScalar *bray, *sol_loc, *sol_loc_save;
1229: IS is_to, is_from;
1230: PetscInt k, proc, j, m, myrstart;
1231: const PetscInt *rstart;
1232: Vec v_mpi, msol_loc;
1233: VecScatter scat_sol;
1234: Vec b_seq;
1235: VecScatter scat_rhs;
1236: PetscScalar *aa;
1237: PetscInt spnr, *ia, *ja;
1238: Mat_MPIAIJ *b = NULL;
1240: PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL);
1243: PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL);
1244: if (denseB) {
1246: mumps->id.ICNTL(20) = 0; /* dense RHS */
1247: } else { /* sparse B */
1249: PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT);
1250: if (flgT) { /* input B is transpose of actual RHS matrix,
1251: because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1252: MatTransposeGetMat(B, &Bt);
1253: } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1254: mumps->id.ICNTL(20) = 1; /* sparse RHS */
1255: }
1257: MatGetSize(B, &M, &nrhs);
1258: mumps->id.nrhs = nrhs;
1259: mumps->id.lrhs = M;
1260: mumps->id.rhs = NULL;
1262: if (mumps->petsc_size == 1) {
1263: PetscScalar *aa;
1264: PetscInt spnr, *ia, *ja;
1265: PetscBool second_solve = PETSC_FALSE;
1267: MatDenseGetArray(X, &array);
1268: mumps->id.rhs = (MumpsScalar *)array;
1270: if (denseB) {
1271: /* copy B to X */
1272: MatDenseGetArrayRead(B, &rbray);
1273: PetscArraycpy(array, rbray, M * nrhs);
1274: MatDenseRestoreArrayRead(B, &rbray);
1275: } else { /* sparse B */
1276: MatSeqAIJGetArray(Bt, &aa);
1277: MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg);
1279: PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs);
1280: mumps->id.rhs_sparse = (MumpsScalar *)aa;
1281: }
1282: /* handle condensation step of Schur complement (if any) */
1283: if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1284: second_solve = PETSC_TRUE;
1285: MatMumpsHandleSchur_Private(A, PETSC_FALSE);
1286: }
1287: /* solve phase */
1288: /*-------------*/
1289: mumps->id.job = JOB_SOLVE;
1290: PetscMUMPS_c(mumps);
1293: /* handle expansion step of Schur complement (if any) */
1294: if (second_solve) MatMumpsHandleSchur_Private(A, PETSC_TRUE);
1295: if (!denseB) { /* sparse B */
1296: MatSeqAIJRestoreArray(Bt, &aa);
1297: MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg);
1299: }
1300: MatDenseRestoreArray(X, &array);
1301: return 0;
1302: }
1304: /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1307: /* create msol_loc to hold mumps local solution */
1308: isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1309: sol_loc_save = (PetscScalar *)mumps->id.sol_loc;
1311: lsol_loc = mumps->id.lsol_loc;
1312: nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1313: PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc);
1314: mumps->id.sol_loc = (MumpsScalar *)sol_loc;
1315: mumps->id.isol_loc = isol_loc;
1317: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc);
1319: if (denseB) {
1320: if (mumps->ICNTL20 == 10) {
1321: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1322: MatDenseGetArrayRead(B, &rbray);
1323: MatMumpsSetUpDistRHSInfo(A, nrhs, rbray);
1324: MatDenseRestoreArrayRead(B, &rbray);
1325: MatGetLocalSize(B, &m, NULL);
1326: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi);
1327: } else {
1328: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1329: /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1330: very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1331: 0, re-arrange B into desired order, which is a local operation.
1332: */
1334: /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1335: /* wrap dense rhs matrix B into a vector v_mpi */
1336: MatGetLocalSize(B, &m, NULL);
1337: MatDenseGetArray(B, &bray);
1338: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi);
1339: MatDenseRestoreArray(B, &bray);
1341: /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1342: if (!mumps->myid) {
1343: PetscInt *idx;
1344: /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1345: PetscMalloc1(nrhs * M, &idx);
1346: MatGetOwnershipRanges(B, &rstart);
1347: k = 0;
1348: for (proc = 0; proc < mumps->petsc_size; proc++) {
1349: for (j = 0; j < nrhs; j++) {
1350: for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1351: }
1352: }
1354: VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq);
1355: ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to);
1356: ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from);
1357: } else {
1358: VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq);
1359: ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to);
1360: ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from);
1361: }
1362: VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs);
1363: VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD);
1364: ISDestroy(&is_to);
1365: ISDestroy(&is_from);
1366: VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD);
1368: if (!mumps->myid) { /* define rhs on the host */
1369: VecGetArray(b_seq, &bray);
1370: mumps->id.rhs = (MumpsScalar *)bray;
1371: VecRestoreArray(b_seq, &bray);
1372: }
1373: }
1374: } else { /* sparse B */
1375: b = (Mat_MPIAIJ *)Bt->data;
1377: /* wrap dense X into a vector v_mpi */
1378: MatGetLocalSize(X, &m, NULL);
1379: MatDenseGetArray(X, &bray);
1380: VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi);
1381: MatDenseRestoreArray(X, &bray);
1383: if (!mumps->myid) {
1384: MatSeqAIJGetArray(b->A, &aa);
1385: MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg);
1387: PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs);
1388: mumps->id.rhs_sparse = (MumpsScalar *)aa;
1389: } else {
1390: mumps->id.irhs_ptr = NULL;
1391: mumps->id.irhs_sparse = NULL;
1392: mumps->id.nz_rhs = 0;
1393: mumps->id.rhs_sparse = NULL;
1394: }
1395: }
1397: /* solve phase */
1398: /*-------------*/
1399: mumps->id.job = JOB_SOLVE;
1400: PetscMUMPS_c(mumps);
1403: /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1404: MatDenseGetArray(X, &array);
1405: VecPlaceArray(v_mpi, array);
1407: /* create scatter scat_sol */
1408: MatGetOwnershipRanges(X, &rstart);
1409: /* iidx: index for scatter mumps solution to petsc X */
1411: ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from);
1412: PetscMalloc1(nlsol_loc, &idxx);
1413: for (i = 0; i < lsol_loc; i++) {
1414: isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1416: for (proc = 0; proc < mumps->petsc_size; proc++) {
1417: if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1418: myrstart = rstart[proc];
1419: k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */
1420: iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to petsc index in X */
1421: m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1422: break;
1423: }
1424: }
1426: for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1427: }
1428: ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to);
1429: VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol);
1430: VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD);
1431: ISDestroy(&is_from);
1432: ISDestroy(&is_to);
1433: VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD);
1434: MatDenseRestoreArray(X, &array);
1436: /* free spaces */
1437: mumps->id.sol_loc = (MumpsScalar *)sol_loc_save;
1438: mumps->id.isol_loc = isol_loc_save;
1440: PetscFree2(sol_loc, isol_loc);
1441: PetscFree(idxx);
1442: VecDestroy(&msol_loc);
1443: VecDestroy(&v_mpi);
1444: if (!denseB) {
1445: if (!mumps->myid) {
1446: b = (Mat_MPIAIJ *)Bt->data;
1447: MatSeqAIJRestoreArray(b->A, &aa);
1448: MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg);
1450: }
1451: } else {
1452: if (mumps->ICNTL20 == 0) {
1453: VecDestroy(&b_seq);
1454: VecScatterDestroy(&scat_rhs);
1455: }
1456: }
1457: VecScatterDestroy(&scat_sol);
1458: PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3));
1459: return 0;
1460: }
1462: PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1463: {
1464: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1465: PetscMUMPSInt oldvalue = mumps->id.ICNTL(9);
1467: mumps->id.ICNTL(9) = 0;
1468: MatMatSolve_MUMPS(A, B, X);
1469: mumps->id.ICNTL(9) = oldvalue;
1470: return 0;
1471: }
1473: PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1474: {
1475: PetscBool flg;
1476: Mat B;
1478: PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL);
1481: /* Create B=Bt^T that uses Bt's data structure */
1482: MatCreateTranspose(Bt, &B);
1484: MatMatSolve_MUMPS(A, B, X);
1485: MatDestroy(&B);
1486: return 0;
1487: }
1489: #if !defined(PETSC_USE_COMPLEX)
1490: /*
1491: input:
1492: F: numeric factor
1493: output:
1494: nneg: total number of negative pivots
1495: nzero: total number of zero pivots
1496: npos: (global dimension of F) - nneg - nzero
1497: */
1498: PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1499: {
1500: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
1501: PetscMPIInt size;
1503: MPI_Comm_size(PetscObjectComm((PetscObject)F), &size);
1504: /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1507: if (nneg) *nneg = mumps->id.INFOG(12);
1508: if (nzero || npos) {
1510: if (nzero) *nzero = mumps->id.INFOG(28);
1511: if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1512: }
1513: return 0;
1514: }
1515: #endif
1517: PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1518: {
1519: PetscInt i, nreqs;
1520: PetscMUMPSInt *irn, *jcn;
1521: PetscMPIInt count;
1522: PetscInt64 totnnz, remain;
1523: const PetscInt osize = mumps->omp_comm_size;
1524: PetscScalar *val;
1526: if (osize > 1) {
1527: if (reuse == MAT_INITIAL_MATRIX) {
1528: /* master first gathers counts of nonzeros to receive */
1529: if (mumps->is_omp_master) PetscMalloc1(osize, &mumps->recvcount);
1530: MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm);
1532: /* Then each computes number of send/recvs */
1533: if (mumps->is_omp_master) {
1534: /* Start from 1 since self communication is not done in MPI */
1535: nreqs = 0;
1536: for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1537: } else {
1538: nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1539: }
1540: PetscMalloc1(nreqs * 3, &mumps->reqs); /* Triple the requests since we send irn, jcn and val separately */
1542: /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1543: MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1544: might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1545: is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1546: */
1547: nreqs = 0; /* counter for actual send/recvs */
1548: if (mumps->is_omp_master) {
1549: for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1550: PetscMalloc2(totnnz, &irn, totnnz, &jcn);
1551: PetscMalloc1(totnnz, &val);
1553: /* Self communication */
1554: PetscArraycpy(irn, mumps->irn, mumps->nnz);
1555: PetscArraycpy(jcn, mumps->jcn, mumps->nnz);
1556: PetscArraycpy(val, mumps->val, mumps->nnz);
1558: /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1559: PetscFree2(mumps->irn, mumps->jcn);
1560: PetscFree(mumps->val_alloc);
1561: mumps->nnz = totnnz;
1562: mumps->irn = irn;
1563: mumps->jcn = jcn;
1564: mumps->val = mumps->val_alloc = val;
1566: irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1567: jcn += mumps->recvcount[0];
1568: val += mumps->recvcount[0];
1570: /* Remote communication */
1571: for (i = 1; i < osize; i++) {
1572: count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1573: remain = mumps->recvcount[i] - count;
1574: while (count > 0) {
1575: MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1576: MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1577: MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1578: irn += count;
1579: jcn += count;
1580: val += count;
1581: count = PetscMin(remain, PETSC_MPI_INT_MAX);
1582: remain -= count;
1583: }
1584: }
1585: } else {
1586: irn = mumps->irn;
1587: jcn = mumps->jcn;
1588: val = mumps->val;
1589: count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1590: remain = mumps->nnz - count;
1591: while (count > 0) {
1592: MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1593: MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1594: MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1595: irn += count;
1596: jcn += count;
1597: val += count;
1598: count = PetscMin(remain, PETSC_MPI_INT_MAX);
1599: remain -= count;
1600: }
1601: }
1602: } else {
1603: nreqs = 0;
1604: if (mumps->is_omp_master) {
1605: val = mumps->val + mumps->recvcount[0];
1606: for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1607: count = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1608: remain = mumps->recvcount[i] - count;
1609: while (count > 0) {
1610: MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1611: val += count;
1612: count = PetscMin(remain, PETSC_MPI_INT_MAX);
1613: remain -= count;
1614: }
1615: }
1616: } else {
1617: val = mumps->val;
1618: count = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1619: remain = mumps->nnz - count;
1620: while (count > 0) {
1621: MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]);
1622: val += count;
1623: count = PetscMin(remain, PETSC_MPI_INT_MAX);
1624: remain -= count;
1625: }
1626: }
1627: }
1628: MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE);
1629: mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1630: }
1631: return 0;
1632: }
1634: PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info)
1635: {
1636: Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1637: PetscBool isMPIAIJ;
1639: if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1640: if (mumps->id.INFOG(1) == -6) PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1641: PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1642: return 0;
1643: }
1645: (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);
1646: MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps);
1648: /* numerical factorization phase */
1649: /*-------------------------------*/
1650: mumps->id.job = JOB_FACTNUMERIC;
1651: if (!mumps->id.ICNTL(18)) { /* A is centralized */
1652: if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
1653: } else {
1654: mumps->id.a_loc = (MumpsScalar *)mumps->val;
1655: }
1656: PetscMUMPS_c(mumps);
1657: if (mumps->id.INFOG(1) < 0) {
1659: if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1660: PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1661: F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1662: } else if (mumps->id.INFOG(1) == -13) {
1663: PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1664: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1665: } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1666: PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1667: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1668: } else {
1669: PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1670: F->factorerrortype = MAT_FACTOR_OTHER;
1671: }
1672: }
1675: F->assembled = PETSC_TRUE;
1677: if (F->schur) { /* reset Schur status to unfactored */
1678: #if defined(PETSC_HAVE_CUDA)
1679: F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1680: #endif
1681: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1682: mumps->id.ICNTL(19) = 2;
1683: MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur);
1684: }
1685: MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED);
1686: }
1688: /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1689: if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1691: if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1692: if (mumps->petsc_size > 1) {
1693: PetscInt lsol_loc;
1694: PetscScalar *sol_loc;
1696: PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ);
1698: /* distributed solution; Create x_seq=sol_loc for repeated use */
1699: if (mumps->x_seq) {
1700: VecScatterDestroy(&mumps->scat_sol);
1701: PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc);
1702: VecDestroy(&mumps->x_seq);
1703: }
1704: lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1705: PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc);
1706: mumps->id.lsol_loc = lsol_loc;
1707: mumps->id.sol_loc = (MumpsScalar *)sol_loc;
1708: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq);
1709: }
1710: PetscLogFlops(mumps->id.RINFO(2));
1711: return 0;
1712: }
1714: /* Sets MUMPS options from the options database */
1715: PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1716: {
1717: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
1718: PetscMUMPSInt icntl = 0, size, *listvar_schur;
1719: PetscInt info[80], i, ninfo = 80, rbs, cbs;
1720: PetscBool flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
1721: MumpsScalar *arr;
1723: PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
1724: if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1725: PetscInt nthreads = 0;
1726: PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1727: PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
1729: mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1730: MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size);
1731: MPI_Comm_rank(mumps->petsc_comm, &mumps->myid); /* "if (!myid)" still works even if mumps_comm is different */
1733: PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support);
1734: if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1735: /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1736: PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL);
1737: if (mumps->use_petsc_omp_support) {
1739: ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1741: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1742: PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl);
1743: PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master);
1744: #endif
1745: } else {
1746: mumps->omp_comm = PETSC_COMM_SELF;
1747: mumps->mumps_comm = mumps->petsc_comm;
1748: mumps->is_omp_master = PETSC_TRUE;
1749: }
1750: MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size);
1751: mumps->reqs = NULL;
1752: mumps->tag = 0;
1754: if (mumps->mumps_comm != MPI_COMM_NULL) {
1755: if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1756: /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1757: MPI_Comm comm;
1758: MPI_Comm_dup(mumps->mumps_comm, &comm);
1759: mumps->mumps_comm = comm;
1760: } else PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm);
1761: }
1763: mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1764: mumps->id.job = JOB_INIT;
1765: mumps->id.par = 1; /* host participates factorizaton and solve */
1766: mumps->id.sym = mumps->sym;
1768: size = mumps->id.size_schur;
1769: arr = mumps->id.schur;
1770: listvar_schur = mumps->id.listvar_schur;
1771: PetscMUMPS_c(mumps);
1773: /* restore cached ICNTL and CNTL values */
1774: for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
1775: for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
1776: PetscFree(mumps->ICNTL_pre);
1777: PetscFree(mumps->CNTL_pre);
1779: if (schur) {
1780: mumps->id.size_schur = size;
1781: mumps->id.schur_lld = size;
1782: mumps->id.schur = arr;
1783: mumps->id.listvar_schur = listvar_schur;
1784: if (mumps->petsc_size > 1) {
1785: PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
1787: mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
1788: gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1789: MPI_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm);
1791: } else {
1792: if (F->factortype == MAT_FACTOR_LU) {
1793: mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1794: } else {
1795: mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1796: }
1797: }
1798: mumps->id.ICNTL(26) = -1;
1799: }
1801: /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1802: For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1803: */
1804: MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm);
1805: MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm);
1807: mumps->scat_rhs = NULL;
1808: mumps->scat_sol = NULL;
1810: /* set PETSc-MUMPS default options - override MUMPS default */
1811: mumps->id.ICNTL(3) = 0;
1812: mumps->id.ICNTL(4) = 0;
1813: if (mumps->petsc_size == 1) {
1814: mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1815: mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */
1816: } else {
1817: mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1818: mumps->id.ICNTL(21) = 1; /* distributed solution */
1819: }
1820: }
1821: PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg);
1822: if (flg) mumps->id.ICNTL(1) = icntl;
1823: PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg);
1824: if (flg) mumps->id.ICNTL(2) = icntl;
1825: PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg);
1826: if (flg) mumps->id.ICNTL(3) = icntl;
1828: PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg);
1829: if (flg) mumps->id.ICNTL(4) = icntl;
1830: if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1832: PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg);
1833: if (flg) mumps->id.ICNTL(6) = icntl;
1835: PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg);
1836: if (flg) {
1838: mumps->id.ICNTL(7) = icntl;
1839: }
1841: PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL);
1842: /* PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL); handled by MatSolveTranspose_MUMPS() */
1843: PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL);
1844: PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL);
1845: PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL);
1846: PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL);
1847: PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL);
1848: MatGetBlockSizes(A, &rbs, &cbs);
1849: if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1850: PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg);
1851: if (flg) {
1854: }
1855: PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL);
1856: if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1857: MatDestroy(&F->schur);
1858: MatMumpsResetSchur_Private(mumps);
1859: }
1861: /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1862: and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1863: and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1864: This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1865: see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1866: In short, we could not use distributed RHS with MPICH until v4.0b1.
1867: */
1868: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1869: mumps->ICNTL20 = 0; /* Centralized dense RHS*/
1870: #else
1871: mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1872: #endif
1873: PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg);
1875: #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
1877: #endif
1878: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL); we only use distributed solution vector */
1880: PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL);
1881: PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL);
1882: PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL);
1883: if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
1885: PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL);
1886: PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL);
1887: PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL);
1888: PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL);
1889: PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL);
1890: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL); */ /* call MatMumpsGetInverse() directly */
1891: PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL);
1892: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL); -- not supported by PETSc API */
1893: PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL);
1894: PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL);
1895: PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL);
1896: PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL);
1898: PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL);
1899: PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL);
1900: PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL);
1901: PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL);
1902: PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL);
1903: PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL);
1905: PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);
1907: PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL);
1908: if (ninfo) {
1910: PetscMalloc1(ninfo, &mumps->info);
1911: mumps->ninfo = ninfo;
1912: for (i = 0; i < ninfo; i++) {
1914: mumps->info[i] = info[i];
1915: }
1916: }
1917: PetscOptionsEnd();
1918: return 0;
1919: }
1921: PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps)
1922: {
1923: if (mumps->id.INFOG(1) < 0) {
1925: if (mumps->id.INFOG(1) == -6) {
1926: PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1927: F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1928: } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1929: PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1930: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1931: } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1932: PetscInfo(F, "Empty matrix\n");
1933: } else {
1934: PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2));
1935: F->factorerrortype = MAT_FACTOR_OTHER;
1936: }
1937: }
1938: return 0;
1939: }
1941: PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
1942: {
1943: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
1944: Vec b;
1945: const PetscInt M = A->rmap->N;
1947: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1948: /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1949: return 0;
1950: }
1952: /* Set MUMPS options from the options database */
1953: MatSetFromOptions_MUMPS(F, A);
1955: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
1956: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps);
1958: /* analysis phase */
1959: /*----------------*/
1960: mumps->id.job = JOB_FACTSYMBOLIC;
1961: mumps->id.n = M;
1962: switch (mumps->id.ICNTL(18)) {
1963: case 0: /* centralized assembled matrix input */
1964: if (!mumps->myid) {
1965: mumps->id.nnz = mumps->nnz;
1966: mumps->id.irn = mumps->irn;
1967: mumps->id.jcn = mumps->jcn;
1968: if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
1969: if (r) {
1970: mumps->id.ICNTL(7) = 1;
1971: if (!mumps->myid) {
1972: const PetscInt *idx;
1973: PetscInt i;
1975: PetscMalloc1(M, &mumps->id.perm_in);
1976: ISGetIndices(r, &idx);
1977: for (i = 0; i < M; i++) PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
1978: ISRestoreIndices(r, &idx);
1979: }
1980: }
1981: }
1982: break;
1983: case 3: /* distributed assembled matrix input (size>1) */
1984: mumps->id.nnz_loc = mumps->nnz;
1985: mumps->id.irn_loc = mumps->irn;
1986: mumps->id.jcn_loc = mumps->jcn;
1987: if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
1988: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1989: MatCreateVecs(A, NULL, &b);
1990: VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq);
1991: VecDestroy(&b);
1992: }
1993: break;
1994: }
1995: PetscMUMPS_c(mumps);
1996: MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps);
1998: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
1999: F->ops->solve = MatSolve_MUMPS;
2000: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2001: F->ops->matsolve = MatMatSolve_MUMPS;
2002: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2003: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2005: mumps->matstruc = SAME_NONZERO_PATTERN;
2006: return 0;
2007: }
2009: /* Note the Petsc r and c permutations are ignored */
2010: PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2011: {
2012: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2013: Vec b;
2014: const PetscInt M = A->rmap->N;
2016: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2017: /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2018: return 0;
2019: }
2021: /* Set MUMPS options from the options database */
2022: MatSetFromOptions_MUMPS(F, A);
2024: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
2025: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps);
2027: /* analysis phase */
2028: /*----------------*/
2029: mumps->id.job = JOB_FACTSYMBOLIC;
2030: mumps->id.n = M;
2031: switch (mumps->id.ICNTL(18)) {
2032: case 0: /* centralized assembled matrix input */
2033: if (!mumps->myid) {
2034: mumps->id.nnz = mumps->nnz;
2035: mumps->id.irn = mumps->irn;
2036: mumps->id.jcn = mumps->jcn;
2037: if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2038: }
2039: break;
2040: case 3: /* distributed assembled matrix input (size>1) */
2041: mumps->id.nnz_loc = mumps->nnz;
2042: mumps->id.irn_loc = mumps->irn;
2043: mumps->id.jcn_loc = mumps->jcn;
2044: if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2045: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2046: MatCreateVecs(A, NULL, &b);
2047: VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq);
2048: VecDestroy(&b);
2049: }
2050: break;
2051: }
2052: PetscMUMPS_c(mumps);
2053: MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps);
2055: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2056: F->ops->solve = MatSolve_MUMPS;
2057: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2058: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2060: mumps->matstruc = SAME_NONZERO_PATTERN;
2061: return 0;
2062: }
2064: /* Note the Petsc r permutation and factor info are ignored */
2065: PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info)
2066: {
2067: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2068: Vec b;
2069: const PetscInt M = A->rmap->N;
2071: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2072: /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2073: return 0;
2074: }
2076: /* Set MUMPS options from the options database */
2077: MatSetFromOptions_MUMPS(F, A);
2079: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
2080: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps);
2082: /* analysis phase */
2083: /*----------------*/
2084: mumps->id.job = JOB_FACTSYMBOLIC;
2085: mumps->id.n = M;
2086: switch (mumps->id.ICNTL(18)) {
2087: case 0: /* centralized assembled matrix input */
2088: if (!mumps->myid) {
2089: mumps->id.nnz = mumps->nnz;
2090: mumps->id.irn = mumps->irn;
2091: mumps->id.jcn = mumps->jcn;
2092: if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2093: }
2094: break;
2095: case 3: /* distributed assembled matrix input (size>1) */
2096: mumps->id.nnz_loc = mumps->nnz;
2097: mumps->id.irn_loc = mumps->irn;
2098: mumps->id.jcn_loc = mumps->jcn;
2099: if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2100: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2101: MatCreateVecs(A, NULL, &b);
2102: VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq);
2103: VecDestroy(&b);
2104: }
2105: break;
2106: }
2107: PetscMUMPS_c(mumps);
2108: MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps);
2110: F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2111: F->ops->solve = MatSolve_MUMPS;
2112: F->ops->solvetranspose = MatSolve_MUMPS;
2113: F->ops->matsolve = MatMatSolve_MUMPS;
2114: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2115: F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2116: #if defined(PETSC_USE_COMPLEX)
2117: F->ops->getinertia = NULL;
2118: #else
2119: F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2120: #endif
2122: mumps->matstruc = SAME_NONZERO_PATTERN;
2123: return 0;
2124: }
2126: PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2127: {
2128: PetscBool iascii;
2129: PetscViewerFormat format;
2130: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2132: /* check if matrix is mumps type */
2133: if (A->ops->solve != MatSolve_MUMPS) return 0;
2135: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
2136: if (iascii) {
2137: PetscViewerGetFormat(viewer, &format);
2138: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2139: PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n");
2140: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2141: PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym);
2142: PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par);
2143: PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1));
2144: PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2));
2145: PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3));
2146: PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4));
2147: PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5));
2148: PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6));
2149: PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7));
2150: PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8));
2151: PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10));
2152: PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11));
2153: if (mumps->id.ICNTL(11) > 0) {
2154: PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", mumps->id.RINFOG(4));
2155: PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", mumps->id.RINFOG(5));
2156: PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", mumps->id.RINFOG(6));
2157: PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8));
2158: PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", mumps->id.RINFOG(9));
2159: PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11));
2160: }
2161: PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12));
2162: PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13));
2163: PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14));
2164: PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15));
2165: /* ICNTL(15-17) not used */
2166: PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18));
2167: PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19));
2168: PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20));
2169: PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21));
2170: PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22));
2171: PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23));
2173: PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24));
2174: PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25));
2175: PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26));
2176: PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27));
2177: PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28));
2178: PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29));
2180: PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30));
2181: PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31));
2182: PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33));
2183: PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35));
2184: PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36));
2185: PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38));
2187: PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", mumps->id.CNTL(1));
2188: PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2));
2189: PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", mumps->id.CNTL(3));
2190: PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", mumps->id.CNTL(4));
2191: PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", mumps->id.CNTL(5));
2192: PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", mumps->id.CNTL(7));
2194: /* information local to each processor */
2195: PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n");
2196: PetscViewerASCIIPushSynchronized(viewer);
2197: PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(1));
2198: PetscViewerFlush(viewer);
2199: PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n");
2200: PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(2));
2201: PetscViewerFlush(viewer);
2202: PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n");
2203: PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, mumps->id.RINFO(3));
2204: PetscViewerFlush(viewer);
2206: PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n");
2207: PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15));
2208: PetscViewerFlush(viewer);
2210: PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n");
2211: PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16));
2212: PetscViewerFlush(viewer);
2214: PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n");
2215: PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23));
2216: PetscViewerFlush(viewer);
2218: if (mumps->ninfo && mumps->ninfo <= 80) {
2219: PetscInt i;
2220: for (i = 0; i < mumps->ninfo; i++) {
2221: PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i]);
2222: PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]));
2223: PetscViewerFlush(viewer);
2224: }
2225: }
2226: PetscViewerASCIIPopSynchronized(viewer);
2227: } else PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "");
2229: if (mumps->myid == 0) { /* information from the host */
2230: PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1));
2231: PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2));
2232: PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3));
2233: PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34));
2235: PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3));
2236: PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4));
2237: PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5));
2238: PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6));
2239: PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7));
2240: PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8));
2241: PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9));
2242: PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10));
2243: PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11));
2244: PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12));
2245: PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13));
2246: PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14));
2247: PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15));
2248: PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16));
2249: PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17));
2250: PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18));
2251: PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19));
2252: PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20));
2253: PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21));
2254: PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22));
2255: PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23));
2256: PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24));
2257: PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25));
2258: PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28));
2259: PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29));
2260: PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31));
2261: PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32));
2262: PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33));
2263: PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34));
2264: PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35));
2265: PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36));
2266: PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37));
2267: PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38));
2268: PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39));
2269: }
2270: }
2271: }
2272: return 0;
2273: }
2275: PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info)
2276: {
2277: Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2279: info->block_size = 1.0;
2280: info->nz_allocated = mumps->id.INFOG(20);
2281: info->nz_used = mumps->id.INFOG(20);
2282: info->nz_unneeded = 0.0;
2283: info->assemblies = 0.0;
2284: info->mallocs = 0.0;
2285: info->memory = 0.0;
2286: info->fill_ratio_given = 0;
2287: info->fill_ratio_needed = 0;
2288: info->factor_mallocs = 0;
2289: return 0;
2290: }
2292: /* -------------------------------------------------------------------------------------------*/
2293: PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2294: {
2295: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2296: const PetscScalar *arr;
2297: const PetscInt *idxs;
2298: PetscInt size, i;
2300: ISGetLocalSize(is, &size);
2301: /* Schur complement matrix */
2302: MatDestroy(&F->schur);
2303: MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur);
2304: MatDenseGetArrayRead(F->schur, &arr);
2305: mumps->id.schur = (MumpsScalar *)arr;
2306: mumps->id.size_schur = size;
2307: mumps->id.schur_lld = size;
2308: MatDenseRestoreArrayRead(F->schur, &arr);
2309: if (mumps->sym == 1) MatSetOption(F->schur, MAT_SPD, PETSC_TRUE);
2311: /* MUMPS expects Fortran style indices */
2312: PetscFree(mumps->id.listvar_schur);
2313: PetscMalloc1(size, &mumps->id.listvar_schur);
2314: ISGetIndices(is, &idxs);
2315: for (i = 0; i < size; i++) PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i]));
2316: ISRestoreIndices(is, &idxs);
2317: /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2318: mumps->id.ICNTL(26) = -1;
2319: return 0;
2320: }
2322: /* -------------------------------------------------------------------------------------------*/
2323: PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2324: {
2325: Mat St;
2326: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2327: PetscScalar *array;
2328: #if defined(PETSC_USE_COMPLEX)
2329: PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2330: #endif
2333: MatCreate(PETSC_COMM_SELF, &St);
2334: MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur);
2335: MatSetType(St, MATDENSE);
2336: MatSetUp(St);
2337: MatDenseGetArray(St, &array);
2338: if (!mumps->sym) { /* MUMPS always return a full matrix */
2339: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2340: PetscInt i, j, N = mumps->id.size_schur;
2341: for (i = 0; i < N; i++) {
2342: for (j = 0; j < N; j++) {
2343: #if !defined(PETSC_USE_COMPLEX)
2344: PetscScalar val = mumps->id.schur[i * N + j];
2345: #else
2346: PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2347: #endif
2348: array[j * N + i] = val;
2349: }
2350: }
2351: } else { /* stored by columns */
2352: PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur);
2353: }
2354: } else { /* either full or lower-triangular (not packed) */
2355: if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2356: PetscInt i, j, N = mumps->id.size_schur;
2357: for (i = 0; i < N; i++) {
2358: for (j = i; j < N; j++) {
2359: #if !defined(PETSC_USE_COMPLEX)
2360: PetscScalar val = mumps->id.schur[i * N + j];
2361: #else
2362: PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2363: #endif
2364: array[i * N + j] = val;
2365: array[j * N + i] = val;
2366: }
2367: }
2368: } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2369: PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur);
2370: } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2371: PetscInt i, j, N = mumps->id.size_schur;
2372: for (i = 0; i < N; i++) {
2373: for (j = 0; j < i + 1; j++) {
2374: #if !defined(PETSC_USE_COMPLEX)
2375: PetscScalar val = mumps->id.schur[i * N + j];
2376: #else
2377: PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2378: #endif
2379: array[i * N + j] = val;
2380: array[j * N + i] = val;
2381: }
2382: }
2383: }
2384: }
2385: MatDenseRestoreArray(St, &array);
2386: *S = St;
2387: return 0;
2388: }
2390: /* -------------------------------------------------------------------------------------------*/
2391: PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2392: {
2393: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2395: if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2396: PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2397: for (i = 0; i < nICNTL_pre; ++i)
2398: if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2399: if (i == nICNTL_pre) { /* not already cached */
2400: if (i > 0) PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre);
2401: else PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre);
2402: mumps->ICNTL_pre[0]++;
2403: }
2404: mumps->ICNTL_pre[1 + 2 * i] = icntl;
2405: PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i);
2406: } else PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl));
2407: return 0;
2408: }
2410: PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2411: {
2412: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2414: *ival = mumps->id.ICNTL(icntl);
2415: return 0;
2416: }
2418: /*@
2419: MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2421: Logically Collective
2423: Input Parameters:
2424: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2425: . icntl - index of MUMPS parameter array ICNTL()
2426: - ival - value of MUMPS ICNTL(icntl)
2428: Options Database Key:
2429: . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2431: Level: beginner
2433: References:
2434: . * - MUMPS Users' Guide
2436: .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2437: @*/
2438: PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2439: {
2445: PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2446: return 0;
2447: }
2449: /*@
2450: MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2452: Logically Collective
2454: Input Parameters:
2455: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2456: - icntl - index of MUMPS parameter array ICNTL()
2458: Output Parameter:
2459: . ival - value of MUMPS ICNTL(icntl)
2461: Level: beginner
2463: References:
2464: . * - MUMPS Users' Guide
2466: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2467: @*/
2468: PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2469: {
2475: PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2476: return 0;
2477: }
2479: /* -------------------------------------------------------------------------------------------*/
2480: PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2481: {
2482: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2484: if (mumps->id.job == JOB_NULL) {
2485: PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2486: for (i = 0; i < nCNTL_pre; ++i)
2487: if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2488: if (i == nCNTL_pre) {
2489: if (i > 0) PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre);
2490: else PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre);
2491: mumps->CNTL_pre[0]++;
2492: }
2493: mumps->CNTL_pre[1 + 2 * i] = icntl;
2494: mumps->CNTL_pre[2 + 2 * i] = val;
2495: } else mumps->id.CNTL(icntl) = val;
2496: return 0;
2497: }
2499: PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2500: {
2501: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2503: *val = mumps->id.CNTL(icntl);
2504: return 0;
2505: }
2507: /*@
2508: MatMumpsSetCntl - Set MUMPS parameter CNTL()
2510: Logically Collective
2512: Input Parameters:
2513: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2514: . icntl - index of MUMPS parameter array CNTL()
2515: - val - value of MUMPS CNTL(icntl)
2517: Options Database Key:
2518: . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
2520: Level: beginner
2522: References:
2523: . * - MUMPS Users' Guide
2525: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2526: @*/
2527: PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2528: {
2534: PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2535: return 0;
2536: }
2538: /*@
2539: MatMumpsGetCntl - Get MUMPS parameter CNTL()
2541: Logically Collective
2543: Input Parameters:
2544: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2545: - icntl - index of MUMPS parameter array CNTL()
2547: Output Parameter:
2548: . val - value of MUMPS CNTL(icntl)
2550: Level: beginner
2552: References:
2553: . * - MUMPS Users' Guide
2555: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2556: @*/
2557: PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2558: {
2564: PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2565: return 0;
2566: }
2568: PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2569: {
2570: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2572: *info = mumps->id.INFO(icntl);
2573: return 0;
2574: }
2576: PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2577: {
2578: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2580: *infog = mumps->id.INFOG(icntl);
2581: return 0;
2582: }
2584: PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2585: {
2586: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2588: *rinfo = mumps->id.RINFO(icntl);
2589: return 0;
2590: }
2592: PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2593: {
2594: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2596: *rinfog = mumps->id.RINFOG(icntl);
2597: return 0;
2598: }
2600: PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2601: {
2602: Mat Bt = NULL, Btseq = NULL;
2603: PetscBool flg;
2604: Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2605: PetscScalar *aa;
2606: PetscInt spnr, *ia, *ja, M, nrhs;
2609: PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg);
2610: if (flg) {
2611: MatTransposeGetMat(spRHS, &Bt);
2612: } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
2614: MatMumpsSetIcntl(F, 30, 1);
2616: if (mumps->petsc_size > 1) {
2617: Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
2618: Btseq = b->A;
2619: } else {
2620: Btseq = Bt;
2621: }
2623: MatGetSize(spRHS, &M, &nrhs);
2624: mumps->id.nrhs = nrhs;
2625: mumps->id.lrhs = M;
2626: mumps->id.rhs = NULL;
2628: if (!mumps->myid) {
2629: MatSeqAIJGetArray(Btseq, &aa);
2630: MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg);
2632: PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs);
2633: mumps->id.rhs_sparse = (MumpsScalar *)aa;
2634: } else {
2635: mumps->id.irhs_ptr = NULL;
2636: mumps->id.irhs_sparse = NULL;
2637: mumps->id.nz_rhs = 0;
2638: mumps->id.rhs_sparse = NULL;
2639: }
2640: mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2641: mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2643: /* solve phase */
2644: /*-------------*/
2645: mumps->id.job = JOB_SOLVE;
2646: PetscMUMPS_c(mumps);
2649: if (!mumps->myid) {
2650: MatSeqAIJRestoreArray(Btseq, &aa);
2651: MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg);
2653: }
2654: return 0;
2655: }
2657: /*@
2658: MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2660: Logically Collective
2662: Input Parameters:
2663: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2664: - spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format holding specified indices in processor[0]
2666: Output Parameter:
2667: . spRHS - requested entries of inverse of A
2669: Level: beginner
2671: References:
2672: . * - MUMPS Users' Guide
2674: .seealso: `MatGetFactor()`, `MatCreateTranspose()`
2675: @*/
2676: PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
2677: {
2680: PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
2681: return 0;
2682: }
2684: PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
2685: {
2686: Mat spRHS;
2688: MatCreateTranspose(spRHST, &spRHS);
2689: MatMumpsGetInverse_MUMPS(F, spRHS);
2690: MatDestroy(&spRHS);
2691: return 0;
2692: }
2694: /*@
2695: MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2697: Logically Collective
2699: Input Parameters:
2700: + F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2701: - spRHST - sequential sparse matrix in `MATAIJ` format holding specified indices of A^T in processor[0]
2703: Output Parameter:
2704: . spRHST - requested entries of inverse of A^T
2706: Level: beginner
2708: References:
2709: . * - MUMPS Users' Guide
2711: .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2712: @*/
2713: PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
2714: {
2715: PetscBool flg;
2719: PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL);
2722: PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
2723: return 0;
2724: }
2726: /*@
2727: MatMumpsGetInfo - Get MUMPS parameter INFO()
2729: Logically Collective
2731: Input Parameters:
2732: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2733: - icntl - index of MUMPS parameter array INFO()
2735: Output Parameter:
2736: . ival - value of MUMPS INFO(icntl)
2738: Level: beginner
2740: References:
2741: . * - MUMPS Users' Guide
2743: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2744: @*/
2745: PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
2746: {
2750: PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2751: return 0;
2752: }
2754: /*@
2755: MatMumpsGetInfog - Get MUMPS parameter INFOG()
2757: Logically Collective
2759: Input Parameters:
2760: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2761: - icntl - index of MUMPS parameter array INFOG()
2763: Output Parameter:
2764: . ival - value of MUMPS INFOG(icntl)
2766: Level: beginner
2768: References:
2769: . * - MUMPS Users' Guide
2771: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2772: @*/
2773: PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
2774: {
2778: PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2779: return 0;
2780: }
2782: /*@
2783: MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2785: Logically Collective
2787: Input Parameters:
2788: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2789: - icntl - index of MUMPS parameter array RINFO()
2791: Output Parameter:
2792: . val - value of MUMPS RINFO(icntl)
2794: Level: beginner
2796: References:
2797: . * - MUMPS Users' Guide
2799: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2800: @*/
2801: PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
2802: {
2806: PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2807: return 0;
2808: }
2810: /*@
2811: MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2813: Logically Collective
2815: Input Parameters:
2816: + F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2817: - icntl - index of MUMPS parameter array RINFOG()
2819: Output Parameter:
2820: . val - value of MUMPS RINFOG(icntl)
2822: Level: beginner
2824: References:
2825: . * - MUMPS Users' Guide
2827: .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2828: @*/
2829: PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
2830: {
2834: PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2835: return 0;
2836: }
2838: /*MC
2839: MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for
2840: distributed and sequential matrices via the external package MUMPS.
2842: Works with `MATAIJ` and `MATSBAIJ` matrices
2844: Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2846: Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2848: Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2850: Options Database Keys:
2851: + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2852: . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2853: . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host
2854: . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4)
2855: . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2856: . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2857: Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2858: . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77)
2859: . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
2860: . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2861: . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2862: . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2863: . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
2864: . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
2865: . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
2866: . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2867: . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2868: . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2869: . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
2870: . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2871: . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
2872: . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2873: . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2874: . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2875: . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2876: . -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2877: . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2878: . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2879: . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2880: . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold
2881: . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement
2882: . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2883: . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2884: . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2885: . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2886: - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2887: Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2889: Level: beginner
2891: Notes:
2892: MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will error if the matrix is Hermitian.
2894: When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
2895: `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
2897: When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about the failure by calling
2898: $ KSPGetPC(ksp,&pc);
2899: $ PCFactorGetMatrix(pc,&mat);
2900: $ MatMumpsGetInfo(mat,....);
2901: $ MatMumpsGetInfog(mat,....); etc.
2902: Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2904: Using MUMPS with 64-bit integers
2905: MUMPS provides 64-bit integer support in two build modes:
2906: full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2907: requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2909: selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2910: MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2911: columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2912: integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2914: With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2916: Two modes to run MUMPS/PETSc with OpenMP
2917: $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2918: $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2920: $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2921: $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2923: To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2924: (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2925: (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2926: libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2927: (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2929: If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2930: processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2931: size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2932: are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2933: by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2934: In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2935: if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2936: MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2937: cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2938: problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2939: For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2940: examine the mapping result.
2942: PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
2943: for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
2944: calls `omp_set_num_threads`(m) internally before calling MUMPS.
2946: References:
2947: + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2948: - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2950: .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
2951: M*/
2953: static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type)
2954: {
2955: *type = MATSOLVERMUMPS;
2956: return 0;
2957: }
2959: /* MatGetFactor for Seq and MPI AIJ matrices */
2960: static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
2961: {
2962: Mat B;
2963: Mat_MUMPS *mumps;
2964: PetscBool isSeqAIJ;
2965: PetscMPIInt size;
2967: #if defined(PETSC_USE_COMPLEX)
2969: #endif
2970: /* Create the factorization matrix */
2971: PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ);
2972: MatCreate(PetscObjectComm((PetscObject)A), &B);
2973: MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
2974: PetscStrallocpy("mumps", &((PetscObject)B)->type_name);
2975: MatSetUp(B);
2977: PetscNew(&mumps);
2979: B->ops->view = MatView_MUMPS;
2980: B->ops->getinfo = MatGetInfo_MUMPS;
2982: PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps);
2983: PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS);
2984: PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS);
2985: PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS);
2986: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS);
2987: PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS);
2988: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS);
2989: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS);
2990: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS);
2991: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS);
2992: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS);
2993: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS);
2994: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS);
2996: if (ftype == MAT_FACTOR_LU) {
2997: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
2998: B->factortype = MAT_FACTOR_LU;
2999: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3000: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3001: PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]);
3002: mumps->sym = 0;
3003: } else {
3004: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3005: B->factortype = MAT_FACTOR_CHOLESKY;
3006: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3007: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3008: PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]);
3009: #if defined(PETSC_USE_COMPLEX)
3010: mumps->sym = 2;
3011: #else
3012: if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3013: else mumps->sym = 2;
3014: #endif
3015: }
3017: /* set solvertype */
3018: PetscFree(B->solvertype);
3019: PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype);
3020: MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
3021: if (size == 1) {
3022: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3023: B->canuseordering = PETSC_TRUE;
3024: }
3025: B->ops->destroy = MatDestroy_MUMPS;
3026: B->data = (void *)mumps;
3028: *F = B;
3029: mumps->id.job = JOB_NULL;
3030: mumps->ICNTL_pre = NULL;
3031: mumps->CNTL_pre = NULL;
3032: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3033: return 0;
3034: }
3036: /* MatGetFactor for Seq and MPI SBAIJ matrices */
3037: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F)
3038: {
3039: Mat B;
3040: Mat_MUMPS *mumps;
3041: PetscBool isSeqSBAIJ;
3042: PetscMPIInt size;
3044: #if defined(PETSC_USE_COMPLEX)
3046: #endif
3047: MatCreate(PetscObjectComm((PetscObject)A), &B);
3048: MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
3049: PetscStrallocpy("mumps", &((PetscObject)B)->type_name);
3050: MatSetUp(B);
3052: PetscNew(&mumps);
3053: PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ);
3054: if (isSeqSBAIJ) {
3055: mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3056: } else {
3057: mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3058: }
3060: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3061: B->ops->view = MatView_MUMPS;
3062: B->ops->getinfo = MatGetInfo_MUMPS;
3064: PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps);
3065: PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS);
3066: PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS);
3067: PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS);
3068: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS);
3069: PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS);
3070: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS);
3071: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS);
3072: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS);
3073: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS);
3074: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS);
3075: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS);
3076: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS);
3078: B->factortype = MAT_FACTOR_CHOLESKY;
3079: #if defined(PETSC_USE_COMPLEX)
3080: mumps->sym = 2;
3081: #else
3082: if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3083: else mumps->sym = 2;
3084: #endif
3086: /* set solvertype */
3087: PetscFree(B->solvertype);
3088: PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype);
3089: MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
3090: if (size == 1) {
3091: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3092: B->canuseordering = PETSC_TRUE;
3093: }
3094: PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]);
3095: B->ops->destroy = MatDestroy_MUMPS;
3096: B->data = (void *)mumps;
3098: *F = B;
3099: mumps->id.job = JOB_NULL;
3100: mumps->ICNTL_pre = NULL;
3101: mumps->CNTL_pre = NULL;
3102: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3103: return 0;
3104: }
3106: static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3107: {
3108: Mat B;
3109: Mat_MUMPS *mumps;
3110: PetscBool isSeqBAIJ;
3111: PetscMPIInt size;
3113: /* Create the factorization matrix */
3114: PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ);
3115: MatCreate(PetscObjectComm((PetscObject)A), &B);
3116: MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
3117: PetscStrallocpy("mumps", &((PetscObject)B)->type_name);
3118: MatSetUp(B);
3120: PetscNew(&mumps);
3121: if (ftype == MAT_FACTOR_LU) {
3122: B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3123: B->factortype = MAT_FACTOR_LU;
3124: if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3125: else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3126: mumps->sym = 0;
3127: PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]);
3128: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3130: B->ops->view = MatView_MUMPS;
3131: B->ops->getinfo = MatGetInfo_MUMPS;
3133: PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps);
3134: PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS);
3135: PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS);
3136: PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS);
3137: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS);
3138: PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS);
3139: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS);
3140: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS);
3141: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS);
3142: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS);
3143: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS);
3144: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS);
3145: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS);
3147: /* set solvertype */
3148: PetscFree(B->solvertype);
3149: PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype);
3150: MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
3151: if (size == 1) {
3152: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3153: B->canuseordering = PETSC_TRUE;
3154: }
3155: B->ops->destroy = MatDestroy_MUMPS;
3156: B->data = (void *)mumps;
3158: *F = B;
3159: mumps->id.job = JOB_NULL;
3160: mumps->ICNTL_pre = NULL;
3161: mumps->CNTL_pre = NULL;
3162: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3163: return 0;
3164: }
3166: /* MatGetFactor for Seq and MPI SELL matrices */
3167: static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3168: {
3169: Mat B;
3170: Mat_MUMPS *mumps;
3171: PetscBool isSeqSELL;
3172: PetscMPIInt size;
3174: /* Create the factorization matrix */
3175: PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL);
3176: MatCreate(PetscObjectComm((PetscObject)A), &B);
3177: MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
3178: PetscStrallocpy("mumps", &((PetscObject)B)->type_name);
3179: MatSetUp(B);
3181: PetscNew(&mumps);
3183: B->ops->view = MatView_MUMPS;
3184: B->ops->getinfo = MatGetInfo_MUMPS;
3186: PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps);
3187: PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS);
3188: PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS);
3189: PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS);
3190: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS);
3191: PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS);
3192: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS);
3193: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS);
3194: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS);
3195: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS);
3196: PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS);
3198: if (ftype == MAT_FACTOR_LU) {
3199: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3200: B->factortype = MAT_FACTOR_LU;
3201: if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3202: else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3203: mumps->sym = 0;
3204: PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]);
3205: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3207: /* set solvertype */
3208: PetscFree(B->solvertype);
3209: PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype);
3210: MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
3211: if (size == 1) {
3212: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3213: B->canuseordering = PETSC_TRUE;
3214: }
3215: B->ops->destroy = MatDestroy_MUMPS;
3216: B->data = (void *)mumps;
3218: *F = B;
3219: mumps->id.job = JOB_NULL;
3220: mumps->ICNTL_pre = NULL;
3221: mumps->CNTL_pre = NULL;
3222: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3223: return 0;
3224: }
3226: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3227: {
3228: MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps);
3229: MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps);
3230: MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps);
3231: MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps);
3232: MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps);
3233: MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps);
3234: MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps);
3235: MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps);
3236: MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps);
3237: MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps);
3238: MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps);
3239: return 0;
3240: }