Actual source code: superlu_dist.c
1: /*
2: Provides an interface to the SuperLU_DIST sparse solver
3: */
5: #include <../src/mat/impls/aij/seq/aij.h>
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <petscpkg_version.h>
9: EXTERN_C_BEGIN
10: #if defined(PETSC_USE_COMPLEX)
11: #define CASTDOUBLECOMPLEX (doublecomplex *)
12: #define CASTDOUBLECOMPLEXSTAR (doublecomplex **)
13: #include <superlu_zdefs.h>
14: #define LUstructInit zLUstructInit
15: #define ScalePermstructInit zScalePermstructInit
16: #define ScalePermstructFree zScalePermstructFree
17: #define LUstructFree zLUstructFree
18: #define Destroy_LU zDestroy_LU
19: #define ScalePermstruct_t zScalePermstruct_t
20: #define LUstruct_t zLUstruct_t
21: #define SOLVEstruct_t zSOLVEstruct_t
22: #define SolveFinalize zSolveFinalize
23: #define pGetDiagU pzGetDiagU
24: #define pgssvx pzgssvx
25: #define allocateA_dist zallocateA_dist
26: #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist
27: #define SLU SLU_Z
28: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
29: #define DeAllocLlu_3d zDeAllocLlu_3d
30: #define DeAllocGlu_3d zDeAllocGlu_3d
31: #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d
32: #define pgssvx3d pzgssvx3d
33: #endif
34: #elif defined(PETSC_USE_REAL_SINGLE)
35: #define CASTDOUBLECOMPLEX
36: #define CASTDOUBLECOMPLEXSTAR
37: #include <superlu_sdefs.h>
38: #define LUstructInit sLUstructInit
39: #define ScalePermstructInit sScalePermstructInit
40: #define ScalePermstructFree sScalePermstructFree
41: #define LUstructFree sLUstructFree
42: #define Destroy_LU sDestroy_LU
43: #define ScalePermstruct_t sScalePermstruct_t
44: #define LUstruct_t sLUstruct_t
45: #define SOLVEstruct_t sSOLVEstruct_t
46: #define SolveFinalize sSolveFinalize
47: #define pGetDiagU psGetDiagU
48: #define pgssvx psgssvx
49: #define allocateA_dist sallocateA_dist
50: #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist
51: #define SLU SLU_S
52: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
53: #define DeAllocLlu_3d sDeAllocLlu_3d
54: #define DeAllocGlu_3d sDeAllocGlu_3d
55: #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d
56: #define pgssvx3d psgssvx3d
57: #endif
58: #else
59: #define CASTDOUBLECOMPLEX
60: #define CASTDOUBLECOMPLEXSTAR
61: #include <superlu_ddefs.h>
62: #define LUstructInit dLUstructInit
63: #define ScalePermstructInit dScalePermstructInit
64: #define ScalePermstructFree dScalePermstructFree
65: #define LUstructFree dLUstructFree
66: #define Destroy_LU dDestroy_LU
67: #define ScalePermstruct_t dScalePermstruct_t
68: #define LUstruct_t dLUstruct_t
69: #define SOLVEstruct_t dSOLVEstruct_t
70: #define SolveFinalize dSolveFinalize
71: #define pGetDiagU pdGetDiagU
72: #define pgssvx pdgssvx
73: #define allocateA_dist dallocateA_dist
74: #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist
75: #define SLU SLU_D
76: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
77: #define DeAllocLlu_3d dDeAllocLlu_3d
78: #define DeAllocGlu_3d dDeAllocGlu_3d
79: #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d
80: #define pgssvx3d pdgssvx3d
81: #endif
82: #endif
83: EXTERN_C_END
85: typedef struct {
86: int_t nprow, npcol, *row, *col;
87: gridinfo_t grid;
88: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
89: PetscBool use3d;
90: int_t npdep; /* replication factor, must be power of two */
91: gridinfo3d_t grid3d;
92: #endif
93: superlu_dist_options_t options;
94: SuperMatrix A_sup;
95: ScalePermstruct_t ScalePermstruct;
96: LUstruct_t LUstruct;
97: int StatPrint;
98: SOLVEstruct_t SOLVEstruct;
99: fact_t FactPattern;
100: MPI_Comm comm_superlu;
101: PetscScalar *val;
102: PetscBool matsolve_iscalled, matmatsolve_iscalled;
103: PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
104: } Mat_SuperLU_DIST;
106: PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU)
107: {
108: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
110: PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU));
111: return 0;
112: }
114: PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU)
115: {
117: PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU));
118: return 0;
119: }
121: /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */
122: typedef struct {
123: MPI_Comm comm;
124: PetscBool busy;
125: gridinfo_t grid;
126: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
127: PetscBool use3d;
128: gridinfo3d_t grid3d;
129: #endif
130: } PetscSuperLU_DIST;
131: static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID;
133: PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
134: {
135: PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val;
137: if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval");
138: PetscInfo(NULL, "Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n");
139: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
140: if (context->use3d) {
141: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d));
142: } else
143: #endif
144: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid));
145: MPI_Comm_free(&context->comm);
146: PetscFree(context);
147: return MPI_SUCCESS;
148: }
150: /*
151: Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for
152: users who do not destroy all PETSc objects before PetscFinalize().
154: The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn()
155: can still check that the keyval associated with the MPI communicator is correct when the MPI
156: communicator is destroyed.
158: This is called in PetscFinalize()
159: */
160: static PetscErrorCode Petsc_Superlu_dist_keyval_free(void)
161: {
162: PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval;
164: PetscInfo(NULL, "Freeing Petsc_Superlu_dist_keyval\n");
165: MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp);
166: return 0;
167: }
169: static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
170: {
171: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
173: if (lu->CleanUpSuperLU_Dist) {
174: /* Deallocate SuperLU_DIST storage */
175: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
176: if (lu->options.SolveInitialized) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
177: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
178: if (lu->use3d) {
179: if (lu->grid3d.zscp.Iam == 0) {
180: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
181: } else {
182: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
183: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct));
184: }
185: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d));
186: } else
187: #endif
188: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct));
189: PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
190: PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
192: /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */
193: if (lu->comm_superlu) {
194: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
195: if (lu->use3d) {
196: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d));
197: } else
198: #endif
199: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid));
200: }
201: }
202: /*
203: * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist.
204: * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use
205: * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist
206: * is off, and the communicator was not released or marked as "not busy " in the old code.
207: * Here we try to release comm regardless.
208: */
209: if (lu->comm_superlu) {
210: PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu);
211: } else {
212: PetscSuperLU_DIST *context;
213: MPI_Comm comm;
214: PetscMPIInt flg;
216: PetscObjectGetComm((PetscObject)A, &comm);
217: MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg);
219: context->busy = PETSC_FALSE;
220: }
222: PetscFree(A->data);
223: /* clear composed functions */
224: PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL);
225: PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL);
227: return 0;
228: }
230: static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x)
231: {
232: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
233: PetscInt m = A->rmap->n;
234: SuperLUStat_t stat;
235: PetscReal berr[1];
236: PetscScalar *bptr = NULL;
237: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
238: static PetscBool cite = PETSC_FALSE;
241: PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM "
242: "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",
243: &cite));
245: if (lu->options.SolveInitialized && !lu->matsolve_iscalled) {
246: /* see comments in MatMatSolve() */
247: PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
248: lu->options.SolveInitialized = NO;
249: }
250: VecCopy(b_mpi, x);
251: VecGetArray(x, &bptr);
253: PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
254: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
255: if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
256: else
257: #endif
258: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
261: if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
262: PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
264: VecRestoreArray(x, &bptr);
265: lu->matsolve_iscalled = PETSC_TRUE;
266: lu->matmatsolve_iscalled = PETSC_FALSE;
267: return 0;
268: }
270: static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X)
271: {
272: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
273: PetscInt m = A->rmap->n, nrhs;
274: SuperLUStat_t stat;
275: PetscReal berr[1];
276: PetscScalar *bptr;
277: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
278: PetscBool flg;
281: PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL);
283: if (X != B_mpi) {
284: PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL);
286: }
288: if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) {
289: /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
290: thus destroy it and create a new SOLVEstruct.
291: Otherwise it may result in memory corruption or incorrect solution
292: See src/mat/tests/ex125.c */
293: PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
294: lu->options.SolveInitialized = NO;
295: }
296: if (X != B_mpi) MatCopy(B_mpi, X, SAME_NONZERO_PATTERN);
298: MatGetSize(B_mpi, NULL, &nrhs);
300: PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
301: MatDenseGetArray(X, &bptr);
303: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
304: if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
305: else
306: #endif
307: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
310: MatDenseRestoreArray(X, &bptr);
312: if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
313: PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
314: lu->matsolve_iscalled = PETSC_FALSE;
315: lu->matmatsolve_iscalled = PETSC_TRUE;
316: return 0;
317: }
319: /*
320: input:
321: F: numeric Cholesky factor
322: output:
323: nneg: total number of negative pivots
324: nzero: total number of zero pivots
325: npos: (global dimension of F) - nneg - nzero
326: */
327: static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
328: {
329: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
330: PetscScalar *diagU = NULL;
331: PetscInt M, i, neg = 0, zero = 0, pos = 0;
332: PetscReal r;
336: MatGetSize(F, &M, NULL);
337: PetscMalloc1(M, &diagU);
338: MatSuperluDistGetDiagU(F, diagU);
339: for (i = 0; i < M; i++) {
340: #if defined(PETSC_USE_COMPLEX)
341: r = PetscImaginaryPart(diagU[i]) / 10.0;
343: r = PetscRealPart(diagU[i]);
344: #else
345: r = diagU[i];
346: #endif
347: if (r > 0) {
348: pos++;
349: } else if (r < 0) {
350: neg++;
351: } else zero++;
352: }
354: PetscFree(diagU);
355: if (nneg) *nneg = neg;
356: if (nzero) *nzero = zero;
357: if (npos) *npos = pos;
358: return 0;
359: }
361: static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info)
362: {
363: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
364: Mat Aloc;
365: const PetscScalar *av;
366: const PetscInt *ai = NULL, *aj = NULL;
367: PetscInt nz, dummy;
368: int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
369: SuperLUStat_t stat;
370: PetscReal *berr = 0;
371: PetscBool ismpiaij, isseqaij, flg;
373: PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij);
374: PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij);
375: if (ismpiaij) {
376: MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc);
377: } else if (isseqaij) {
378: PetscObjectReference((PetscObject)A);
379: Aloc = A;
380: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
382: MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg);
384: MatSeqAIJGetArrayRead(Aloc, &av);
385: nz = ai[Aloc->rmap->n];
387: /* Allocations for A_sup */
388: if (lu->options.Fact == DOFACT) { /* first numeric factorization */
389: PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
390: } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
391: if (lu->FactPattern == SamePattern_SameRowPerm) {
392: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
393: } else if (lu->FactPattern == SamePattern) {
394: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
395: if (lu->use3d) {
396: if (lu->grid3d.zscp.Iam == 0) {
397: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
398: PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
399: } else {
400: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
401: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct));
402: }
403: } else
404: #endif
405: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
406: lu->options.Fact = SamePattern;
407: } else if (lu->FactPattern == DOFACT) {
408: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
409: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
410: lu->options.Fact = DOFACT;
411: PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
412: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT");
413: }
415: /* Copy AIJ matrix to superlu_dist matrix */
416: PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1);
417: PetscArraycpy(lu->col, aj, nz);
418: PetscArraycpy(lu->val, av, nz);
419: MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg);
421: MatSeqAIJRestoreArrayRead(Aloc, &av);
422: MatDestroy(&Aloc);
424: /* Create and setup A_sup */
425: if (lu->options.Fact == DOFACT) {
426: PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE));
427: }
429: /* Factor the matrix. */
430: PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
431: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
432: if (lu->use3d) {
433: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
434: } else
435: #endif
436: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
437: if (sinfo > 0) {
439: if (sinfo <= lu->A_sup.ncol) {
440: F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
441: PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo);
442: } else if (sinfo > lu->A_sup.ncol) {
443: /*
444: number of bytes allocated when memory allocation
445: failure occurred, plus A->ncol.
446: */
447: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
448: PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo);
449: }
452: if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ }
453: PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
454: F->assembled = PETSC_TRUE;
455: F->preallocated = PETSC_TRUE;
456: lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
457: return 0;
458: }
460: /* Note the Petsc r and c permutations are ignored */
461: static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
462: {
463: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
464: PetscInt M = A->rmap->N, N = A->cmap->N, indx;
465: PetscMPIInt size, mpiflg;
466: PetscBool flg, set;
467: const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"};
468: const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"};
469: const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"};
470: MPI_Comm comm;
471: PetscSuperLU_DIST *context = NULL;
473: /* Set options to F */
474: PetscObjectGetComm((PetscObject)F, &comm);
475: MPI_Comm_size(comm, &size);
477: PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat");
478: PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set);
479: if (set && !flg) lu->options.Equil = NO;
481: PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg);
482: if (flg) {
483: switch (indx) {
484: case 0:
485: lu->options.RowPerm = NOROWPERM;
486: break;
487: case 1:
488: lu->options.RowPerm = LargeDiag_MC64;
489: break;
490: case 2:
491: lu->options.RowPerm = LargeDiag_AWPM;
492: break;
493: case 3:
494: lu->options.RowPerm = MY_PERMR;
495: break;
496: default:
497: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation");
498: }
499: }
501: PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg);
502: if (flg) {
503: switch (indx) {
504: case 0:
505: lu->options.ColPerm = NATURAL;
506: break;
507: case 1:
508: lu->options.ColPerm = MMD_AT_PLUS_A;
509: break;
510: case 2:
511: lu->options.ColPerm = MMD_ATA;
512: break;
513: case 3:
514: lu->options.ColPerm = METIS_AT_PLUS_A;
515: break;
516: case 4:
517: lu->options.ColPerm = PARMETIS; /* only works for np>1 */
518: break;
519: default:
520: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
521: }
522: }
524: lu->options.ReplaceTinyPivot = NO;
525: PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set);
526: if (set && flg) lu->options.ReplaceTinyPivot = YES;
528: lu->options.ParSymbFact = NO;
529: PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set);
530: if (set && flg && size > 1) {
531: #if defined(PETSC_HAVE_PARMETIS)
532: lu->options.ParSymbFact = YES;
533: lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
534: #else
535: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS");
536: #endif
537: }
539: lu->FactPattern = SamePattern;
540: PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg);
541: if (flg) {
542: switch (indx) {
543: case 0:
544: lu->FactPattern = SamePattern;
545: break;
546: case 1:
547: lu->FactPattern = SamePattern_SameRowPerm;
548: break;
549: case 2:
550: lu->FactPattern = DOFACT;
551: break;
552: }
553: }
555: lu->options.IterRefine = NOREFINE;
556: PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set);
557: if (set) {
558: if (flg) lu->options.IterRefine = SLU_DOUBLE;
559: else lu->options.IterRefine = NOREFINE;
560: }
562: if (PetscLogPrintInfo) lu->options.PrintStat = YES;
563: else lu->options.PrintStat = NO;
564: PetscOptionsBool("-mat_superlu_dist_statprint", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL);
566: /* Additional options for special cases */
567: if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) {
568: MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_Delete_Fn, &Petsc_Superlu_dist_keyval, (void *)0);
569: PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free);
570: }
571: MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg);
572: if (!mpiflg || context->busy) { /* additional options */
573: if (!mpiflg) {
574: PetscNew(&context);
575: context->busy = PETSC_TRUE;
576: MPI_Comm_dup(comm, &context->comm);
577: MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context);
578: } else {
579: PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu);
580: }
582: /* Default number of process columns and rows */
583: lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size));
584: if (!lu->nprow) lu->nprow = 1;
585: while (lu->nprow > 0) {
586: lu->npcol = (int_t)(size / lu->nprow);
587: if (size == lu->nprow * lu->npcol) break;
588: lu->nprow--;
589: }
590: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
591: lu->use3d = PETSC_FALSE;
592: lu->npdep = 1;
593: #endif
595: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
596: PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL);
598: if (lu->use3d) {
599: PetscInt t;
600: PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL);
601: t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep);
603: if (lu->npdep > 1) {
604: lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep)));
605: if (!lu->nprow) lu->nprow = 1;
606: while (lu->nprow > 0) {
607: lu->npcol = (int_t)(size / (lu->npdep * lu->nprow));
608: if (size == lu->nprow * lu->npcol * lu->npdep) break;
609: lu->nprow--;
610: }
611: }
612: }
613: #endif
614: PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL);
615: PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL);
616: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
618: #else
620: #endif
621: /* end of adding additional options */
623: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
624: if (lu->use3d) {
625: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d));
626: if (context) {
627: context->grid3d = lu->grid3d;
628: context->use3d = lu->use3d;
629: }
630: } else {
631: #endif
632: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid));
633: if (context) context->grid = lu->grid;
634: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
635: }
636: #endif
637: PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n");
638: if (mpiflg) {
639: PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n");
640: } else {
641: PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n");
642: }
643: } else { /* (mpiflg && !context->busy) */
644: PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.");
645: context->busy = PETSC_TRUE;
646: lu->grid = context->grid;
647: }
648: PetscOptionsEnd();
650: /* Initialize ScalePermstruct and LUstruct. */
651: PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct));
652: PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct));
653: F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
654: F->ops->solve = MatSolve_SuperLU_DIST;
655: F->ops->matsolve = MatMatSolve_SuperLU_DIST;
656: F->ops->getinertia = NULL;
658: if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST;
659: lu->CleanUpSuperLU_Dist = PETSC_TRUE;
660: return 0;
661: }
663: static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info)
664: {
665: MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info);
666: F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST;
667: return 0;
668: }
670: static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type)
671: {
672: *type = MATSOLVERSUPERLU_DIST;
673: return 0;
674: }
676: static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer)
677: {
678: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
679: superlu_dist_options_t options;
681: /* check if matrix is superlu_dist type */
682: if (A->ops->solve != MatSolve_SuperLU_DIST) return 0;
684: options = lu->options;
685: PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n");
686: /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the
687: * format spec for int64_t is set to %d for whatever reason */
688: PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol);
689: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
690: if (lu->use3d) PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep);
691: #endif
693: PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO]);
694: PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO]);
695: PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE]);
696: PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol);
698: switch (options.RowPerm) {
699: case NOROWPERM:
700: PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n");
701: break;
702: case LargeDiag_MC64:
703: PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n");
704: break;
705: case LargeDiag_AWPM:
706: PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n");
707: break;
708: case MY_PERMR:
709: PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n");
710: break;
711: default:
712: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
713: }
715: switch (options.ColPerm) {
716: case NATURAL:
717: PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n");
718: break;
719: case MMD_AT_PLUS_A:
720: PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n");
721: break;
722: case MMD_ATA:
723: PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n");
724: break;
725: /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */
726: case METIS_AT_PLUS_A:
727: PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n");
728: break;
729: case PARMETIS:
730: PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n");
731: break;
732: default:
733: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
734: }
736: PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO]);
738: if (lu->FactPattern == SamePattern) {
739: PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n");
740: } else if (lu->FactPattern == SamePattern_SameRowPerm) {
741: PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n");
742: } else if (lu->FactPattern == DOFACT) {
743: PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n");
744: } else {
745: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern");
746: }
747: return 0;
748: }
750: static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer)
751: {
752: PetscBool iascii;
753: PetscViewerFormat format;
755: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
756: if (iascii) {
757: PetscViewerGetFormat(viewer, &format);
758: if (format == PETSC_VIEWER_ASCII_INFO) MatView_Info_SuperLU_DIST(A, viewer);
759: }
760: return 0;
761: }
763: static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F)
764: {
765: Mat B;
766: Mat_SuperLU_DIST *lu;
767: PetscInt M = A->rmap->N, N = A->cmap->N;
768: PetscMPIInt size;
769: superlu_dist_options_t options;
771: /* Create the factorization matrix */
772: MatCreate(PetscObjectComm((PetscObject)A), &B);
773: MatSetSizes(B, A->rmap->n, A->cmap->n, M, N);
774: PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name);
775: MatSetUp(B);
776: B->ops->getinfo = MatGetInfo_External;
777: B->ops->view = MatView_SuperLU_DIST;
778: B->ops->destroy = MatDestroy_SuperLU_DIST;
780: /* Set the default input options:
781: options.Fact = DOFACT;
782: options.Equil = YES;
783: options.ParSymbFact = NO;
784: options.ColPerm = METIS_AT_PLUS_A;
785: options.RowPerm = LargeDiag_MC64;
786: options.ReplaceTinyPivot = YES;
787: options.IterRefine = DOUBLE;
788: options.Trans = NOTRANS;
789: options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
790: options.RefineInitialized = NO;
791: options.PrintStat = YES;
792: options.SymPattern = NO;
793: */
794: set_default_options_dist(&options);
796: B->trivialsymbolic = PETSC_TRUE;
797: if (ftype == MAT_FACTOR_LU) {
798: B->factortype = MAT_FACTOR_LU;
799: B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
800: } else {
801: B->factortype = MAT_FACTOR_CHOLESKY;
802: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST;
803: options.SymPattern = YES;
804: }
806: /* set solvertype */
807: PetscFree(B->solvertype);
808: PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype);
810: PetscNew(&lu);
811: B->data = lu;
812: MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
814: lu->options = options;
815: lu->options.Fact = DOFACT;
816: lu->matsolve_iscalled = PETSC_FALSE;
817: lu->matmatsolve_iscalled = PETSC_FALSE;
819: PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist);
820: PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST);
822: *F = B;
823: return 0;
824: }
826: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void)
827: {
828: MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist);
829: MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist);
830: MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist);
831: MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist);
832: return 0;
833: }
835: /*MC
836: MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
838: Use ./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch to have PETSc installed with SuperLU_DIST
840: Use -pc_type lu -pc_factor_mat_solver_type superlu_dist to use this direct solver
842: Works with `MATAIJ` matrices
844: Options Database Keys:
845: + -mat_superlu_dist_r <n> - number of rows in processor partition
846: . -mat_superlu_dist_c <n> - number of columns in processor partition
847: . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later
848: . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if -mat_superlu_dist_3d) is provided
849: . -mat_superlu_dist_equil - equilibrate the matrix
850: . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation
851: . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation
852: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
853: . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm DOFACT
854: . -mat_superlu_dist_iterrefine - use iterative refinement
855: - -mat_superlu_dist_statprint - print factorization information
857: Note:
858: If PETSc was configured with --with-cuda than this solver will automatically use the GPUs.
860: Level: beginner
862: .seealso: `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()`
863: M*/