Actual source code: mkl_cpardiso.c
2: #include <petscsys.h>
3: #include <../src/mat/impls/aij/mpi/mpiaij.h>
4: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
7: #define MKL_ILP64
8: #endif
9: #include <mkl.h>
10: #include <mkl_cluster_sparse_solver.h>
12: /*
13: * Possible mkl_cpardiso phases that controls the execution of the solver.
14: * For more information check mkl_cpardiso manual.
15: */
16: #define JOB_ANALYSIS 11
17: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
18: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
19: #define JOB_NUMERICAL_FACTORIZATION 22
20: #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
21: #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
22: #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
23: #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
24: #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
25: #define JOB_RELEASE_OF_LU_MEMORY 0
26: #define JOB_RELEASE_OF_ALL_MEMORY -1
28: #define IPARM_SIZE 64
29: #define INT_TYPE MKL_INT
31: static const char *Err_MSG_CPardiso(int errNo)
32: {
33: switch (errNo) {
34: case -1:
35: return "input inconsistent";
36: break;
37: case -2:
38: return "not enough memory";
39: break;
40: case -3:
41: return "reordering problem";
42: break;
43: case -4:
44: return "zero pivot, numerical factorization or iterative refinement problem";
45: break;
46: case -5:
47: return "unclassified (internal) error";
48: break;
49: case -6:
50: return "preordering failed (matrix types 11, 13 only)";
51: break;
52: case -7:
53: return "diagonal matrix problem";
54: break;
55: case -8:
56: return "32-bit integer overflow problem";
57: break;
58: case -9:
59: return "not enough memory for OOC";
60: break;
61: case -10:
62: return "problems with opening OOC temporary files";
63: break;
64: case -11:
65: return "read/write problems with the OOC data file";
66: break;
67: default:
68: return "unknown error";
69: }
70: }
72: /*
73: * Internal data structure.
74: * For more information check mkl_cpardiso manual.
75: */
77: typedef struct {
78: /* Configuration vector */
79: INT_TYPE iparm[IPARM_SIZE];
81: /*
82: * Internal mkl_cpardiso memory location.
83: * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
84: */
85: void *pt[IPARM_SIZE];
87: MPI_Fint comm_mkl_cpardiso;
89: /* Basic mkl_cpardiso info*/
90: INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
92: /* Matrix structure */
93: PetscScalar *a;
95: INT_TYPE *ia, *ja;
97: /* Number of non-zero elements */
98: INT_TYPE nz;
100: /* Row permutaton vector*/
101: INT_TYPE *perm;
103: /* Define is matrix preserve sparce structure. */
104: MatStructure matstruc;
106: PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt *, PetscInt **, PetscInt **, PetscScalar **);
108: /* True if mkl_cpardiso function have been used. */
109: PetscBool CleanUp;
110: } Mat_MKL_CPARDISO;
112: /*
113: * Copy the elements of matrix A.
114: * Input:
115: * - Mat A: MATSEQAIJ matrix
116: * - int shift: matrix index.
117: * - 0 for c representation
118: * - 1 for fortran representation
119: * - MatReuse reuse:
120: * - MAT_INITIAL_MATRIX: Create a new aij representation
121: * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
122: * Output:
123: * - int *nnz: Number of nonzero-elements.
124: * - int **r pointer to i index
125: * - int **c pointer to j elements
126: * - MATRIXTYPE **v: Non-zero elements
127: */
128: PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
129: {
130: Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data;
132: *v = aa->a;
133: if (reuse == MAT_INITIAL_MATRIX) {
134: *r = (INT_TYPE *)aa->i;
135: *c = (INT_TYPE *)aa->j;
136: *nnz = aa->nz;
137: }
138: return 0;
139: }
141: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
142: {
143: const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
144: PetscInt rstart, nz, i, j, countA, countB;
145: PetscInt *row, *col;
146: const PetscScalar *av, *bv;
147: PetscScalar *val;
148: Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
149: Mat_SeqAIJ *aa = (Mat_SeqAIJ *)(mat->A)->data;
150: Mat_SeqAIJ *bb = (Mat_SeqAIJ *)(mat->B)->data;
151: PetscInt colA_start, jB, jcol;
153: ai = aa->i;
154: aj = aa->j;
155: bi = bb->i;
156: bj = bb->j;
157: rstart = A->rmap->rstart;
158: av = aa->a;
159: bv = bb->a;
161: garray = mat->garray;
163: if (reuse == MAT_INITIAL_MATRIX) {
164: nz = aa->nz + bb->nz;
165: *nnz = nz;
166: PetscMalloc3(m + 1, &row, nz, &col, nz, &val);
167: *r = row;
168: *c = col;
169: *v = val;
170: } else {
171: row = *r;
172: col = *c;
173: val = *v;
174: }
176: nz = 0;
177: for (i = 0; i < m; i++) {
178: row[i] = nz;
179: countA = ai[i + 1] - ai[i];
180: countB = bi[i + 1] - bi[i];
181: ajj = aj + ai[i]; /* ptr to the beginning of this row */
182: bjj = bj + bi[i];
184: /* B part, smaller col index */
185: colA_start = rstart + ajj[0]; /* the smallest global col index of A */
186: jB = 0;
187: for (j = 0; j < countB; j++) {
188: jcol = garray[bjj[j]];
189: if (jcol > colA_start) break;
190: col[nz] = jcol;
191: val[nz++] = *bv++;
192: }
193: jB = j;
195: /* A part */
196: for (j = 0; j < countA; j++) {
197: col[nz] = rstart + ajj[j];
198: val[nz++] = *av++;
199: }
201: /* B part, larger col index */
202: for (j = jB; j < countB; j++) {
203: col[nz] = garray[bjj[j]];
204: val[nz++] = *bv++;
205: }
206: }
207: row[m] = nz;
209: return 0;
210: }
212: PetscErrorCode MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
213: {
214: const PetscInt *ai, *aj, *bi, *bj, *garray, bs = A->rmap->bs, bs2 = bs * bs, m = A->rmap->n / bs, *ajj, *bjj;
215: PetscInt rstart, nz, i, j, countA, countB;
216: PetscInt *row, *col;
217: const PetscScalar *av, *bv;
218: PetscScalar *val;
219: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data;
220: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)(mat->A)->data;
221: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data;
222: PetscInt colA_start, jB, jcol;
224: ai = aa->i;
225: aj = aa->j;
226: bi = bb->i;
227: bj = bb->j;
228: rstart = A->rmap->rstart / bs;
229: av = aa->a;
230: bv = bb->a;
232: garray = mat->garray;
234: if (reuse == MAT_INITIAL_MATRIX) {
235: nz = aa->nz + bb->nz;
236: *nnz = nz;
237: PetscMalloc3(m + 1, &row, nz, &col, nz * bs2, &val);
238: *r = row;
239: *c = col;
240: *v = val;
241: } else {
242: row = *r;
243: col = *c;
244: val = *v;
245: }
247: nz = 0;
248: for (i = 0; i < m; i++) {
249: row[i] = nz + 1;
250: countA = ai[i + 1] - ai[i];
251: countB = bi[i + 1] - bi[i];
252: ajj = aj + ai[i]; /* ptr to the beginning of this row */
253: bjj = bj + bi[i];
255: /* B part, smaller col index */
256: colA_start = rstart + (countA > 0 ? ajj[0] : 0); /* the smallest global col index of A */
257: jB = 0;
258: for (j = 0; j < countB; j++) {
259: jcol = garray[bjj[j]];
260: if (jcol > colA_start) break;
261: col[nz++] = jcol + 1;
262: }
263: jB = j;
264: PetscArraycpy(val, bv, jB * bs2);
265: val += jB * bs2;
266: bv += jB * bs2;
268: /* A part */
269: for (j = 0; j < countA; j++) col[nz++] = rstart + ajj[j] + 1;
270: PetscArraycpy(val, av, countA * bs2);
271: val += countA * bs2;
272: av += countA * bs2;
274: /* B part, larger col index */
275: for (j = jB; j < countB; j++) col[nz++] = garray[bjj[j]] + 1;
276: PetscArraycpy(val, bv, (countB - jB) * bs2);
277: val += (countB - jB) * bs2;
278: bv += (countB - jB) * bs2;
279: }
280: row[m] = nz + 1;
282: return 0;
283: }
285: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
286: {
287: const PetscInt *ai, *aj, *bi, *bj, *garray, bs = A->rmap->bs, bs2 = bs * bs, m = A->rmap->n / bs, *ajj, *bjj;
288: PetscInt rstart, nz, i, j, countA, countB;
289: PetscInt *row, *col;
290: const PetscScalar *av, *bv;
291: PetscScalar *val;
292: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data;
293: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)(mat->A)->data;
294: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)(mat->B)->data;
296: ai = aa->i;
297: aj = aa->j;
298: bi = bb->i;
299: bj = bb->j;
300: rstart = A->rmap->rstart / bs;
301: av = aa->a;
302: bv = bb->a;
304: garray = mat->garray;
306: if (reuse == MAT_INITIAL_MATRIX) {
307: nz = aa->nz + bb->nz;
308: *nnz = nz;
309: PetscMalloc3(m + 1, &row, nz, &col, nz * bs2, &val);
310: *r = row;
311: *c = col;
312: *v = val;
313: } else {
314: row = *r;
315: col = *c;
316: val = *v;
317: }
319: nz = 0;
320: for (i = 0; i < m; i++) {
321: row[i] = nz + 1;
322: countA = ai[i + 1] - ai[i];
323: countB = bi[i + 1] - bi[i];
324: ajj = aj + ai[i]; /* ptr to the beginning of this row */
325: bjj = bj + bi[i];
327: /* A part */
328: for (j = 0; j < countA; j++) col[nz++] = rstart + ajj[j] + 1;
329: PetscArraycpy(val, av, countA * bs2);
330: val += countA * bs2;
331: av += countA * bs2;
333: /* B part, larger col index */
334: for (j = 0; j < countB; j++) col[nz++] = garray[bjj[j]] + 1;
335: PetscArraycpy(val, bv, countB * bs2);
336: val += countB * bs2;
337: bv += countB * bs2;
338: }
339: row[m] = nz + 1;
341: return 0;
342: }
344: /*
345: * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
346: */
347: PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
348: {
349: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data;
350: MPI_Comm comm;
352: /* Terminate instance, deallocate memories */
353: if (mat_mkl_cpardiso->CleanUp) {
354: mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
356: cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, NULL, NULL, NULL, mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs,
357: mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, NULL, NULL, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err);
358: }
360: if (mat_mkl_cpardiso->ConvertToTriples != MatCopy_seqaij_seqaij_MKL_CPARDISO) PetscFree3(mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja, mat_mkl_cpardiso->a);
361: comm = MPI_Comm_f2c(mat_mkl_cpardiso->comm_mkl_cpardiso);
362: MPI_Comm_free(&comm);
363: PetscFree(A->data);
365: /* clear composed functions */
366: PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL);
367: PetscObjectComposeFunction((PetscObject)A, "MatMkl_CPardisoSetCntl_C", NULL);
368: return 0;
369: }
371: /*
372: * Computes Ax = b
373: */
374: PetscErrorCode MatSolve_MKL_CPARDISO(Mat A, Vec b, Vec x)
375: {
376: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)(A)->data;
377: PetscScalar *xarray;
378: const PetscScalar *barray;
380: mat_mkl_cpardiso->nrhs = 1;
381: VecGetArray(x, &xarray);
382: VecGetArrayRead(b, &barray);
384: /* solve phase */
385: /*-------------*/
386: mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
387: cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja,
388: mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err);
392: VecRestoreArray(x, &xarray);
393: VecRestoreArrayRead(b, &barray);
394: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
395: return 0;
396: }
398: PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A, Vec b, Vec x)
399: {
400: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data;
402: #if defined(PETSC_USE_COMPLEX)
403: mat_mkl_cpardiso->iparm[12 - 1] = 1;
404: #else
405: mat_mkl_cpardiso->iparm[12 - 1] = 2;
406: #endif
407: MatSolve_MKL_CPARDISO(A, b, x);
408: mat_mkl_cpardiso->iparm[12 - 1] = 0;
409: return 0;
410: }
412: PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A, Mat B, Mat X)
413: {
414: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)(A)->data;
415: PetscScalar *xarray;
416: const PetscScalar *barray;
418: MatGetSize(B, NULL, (PetscInt *)&mat_mkl_cpardiso->nrhs);
420: if (mat_mkl_cpardiso->nrhs > 0) {
421: MatDenseGetArrayRead(B, &barray);
422: MatDenseGetArray(X, &xarray);
426: /* solve phase */
427: /*-------------*/
428: mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
429: cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja,
430: mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err);
432: MatDenseRestoreArrayRead(B, &barray);
433: MatDenseRestoreArray(X, &xarray);
434: }
435: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
436: return 0;
437: }
439: /*
440: * LU Decomposition
441: */
442: PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F, Mat A, const MatFactorInfo *info)
443: {
444: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)(F)->data;
446: mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
447: (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX, &mat_mkl_cpardiso->nz, &mat_mkl_cpardiso->ia, &mat_mkl_cpardiso->ja, &mat_mkl_cpardiso->a);
449: mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
450: cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja,
451: mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, NULL, NULL, &mat_mkl_cpardiso->comm_mkl_cpardiso, &mat_mkl_cpardiso->err);
454: mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
455: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
456: return 0;
457: }
459: /* Sets mkl_cpardiso options from the options database */
460: PetscErrorCode MatSetFromOptions_MKL_CPARDISO(Mat F, Mat A)
461: {
462: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data;
463: PetscInt icntl, threads;
464: PetscBool flg;
466: PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MKL_CPARDISO Options", "Mat");
467: PetscOptionsInt("-mat_mkl_cpardiso_65", "Suggested number of threads to use within MKL_CPARDISO", "None", threads, &threads, &flg);
468: if (flg) mkl_set_num_threads((int)threads);
470: PetscOptionsInt("-mat_mkl_cpardiso_66", "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time", "None", mat_mkl_cpardiso->maxfct, &icntl, &flg);
471: if (flg) mat_mkl_cpardiso->maxfct = icntl;
473: PetscOptionsInt("-mat_mkl_cpardiso_67", "Indicates the actual matrix for the solution phase", "None", mat_mkl_cpardiso->mnum, &icntl, &flg);
474: if (flg) mat_mkl_cpardiso->mnum = icntl;
476: PetscOptionsInt("-mat_mkl_cpardiso_68", "Message level information", "None", mat_mkl_cpardiso->msglvl, &icntl, &flg);
477: if (flg) mat_mkl_cpardiso->msglvl = icntl;
479: PetscOptionsInt("-mat_mkl_cpardiso_69", "Defines the matrix type", "None", mat_mkl_cpardiso->mtype, &icntl, &flg);
480: if (flg) mat_mkl_cpardiso->mtype = icntl;
481: PetscOptionsInt("-mat_mkl_cpardiso_1", "Use default values", "None", mat_mkl_cpardiso->iparm[0], &icntl, &flg);
483: if (flg && icntl != 0) {
484: PetscOptionsInt("-mat_mkl_cpardiso_2", "Fill-in reducing ordering for the input matrix", "None", mat_mkl_cpardiso->iparm[1], &icntl, &flg);
485: if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
487: PetscOptionsInt("-mat_mkl_cpardiso_4", "Preconditioned CGS/CG", "None", mat_mkl_cpardiso->iparm[3], &icntl, &flg);
488: if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
490: PetscOptionsInt("-mat_mkl_cpardiso_5", "User permutation", "None", mat_mkl_cpardiso->iparm[4], &icntl, &flg);
491: if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
493: PetscOptionsInt("-mat_mkl_cpardiso_6", "Write solution on x", "None", mat_mkl_cpardiso->iparm[5], &icntl, &flg);
494: if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
496: PetscOptionsInt("-mat_mkl_cpardiso_8", "Iterative refinement step", "None", mat_mkl_cpardiso->iparm[7], &icntl, &flg);
497: if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
499: PetscOptionsInt("-mat_mkl_cpardiso_10", "Pivoting perturbation", "None", mat_mkl_cpardiso->iparm[9], &icntl, &flg);
500: if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
502: PetscOptionsInt("-mat_mkl_cpardiso_11", "Scaling vectors", "None", mat_mkl_cpardiso->iparm[10], &icntl, &flg);
503: if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
505: PetscOptionsInt("-mat_mkl_cpardiso_12", "Solve with transposed or conjugate transposed matrix A", "None", mat_mkl_cpardiso->iparm[11], &icntl, &flg);
506: if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
508: PetscOptionsInt("-mat_mkl_cpardiso_13", "Improved accuracy using (non-) symmetric weighted matching", "None", mat_mkl_cpardiso->iparm[12], &icntl, &flg);
509: if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
511: PetscOptionsInt("-mat_mkl_cpardiso_18", "Numbers of non-zero elements", "None", mat_mkl_cpardiso->iparm[17], &icntl, &flg);
512: if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
514: PetscOptionsInt("-mat_mkl_cpardiso_19", "Report number of floating point operations", "None", mat_mkl_cpardiso->iparm[18], &icntl, &flg);
515: if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
517: PetscOptionsInt("-mat_mkl_cpardiso_21", "Pivoting for symmetric indefinite matrices", "None", mat_mkl_cpardiso->iparm[20], &icntl, &flg);
518: if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
520: PetscOptionsInt("-mat_mkl_cpardiso_24", "Parallel factorization control", "None", mat_mkl_cpardiso->iparm[23], &icntl, &flg);
521: if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
523: PetscOptionsInt("-mat_mkl_cpardiso_25", "Parallel forward/backward solve control", "None", mat_mkl_cpardiso->iparm[24], &icntl, &flg);
524: if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
526: PetscOptionsInt("-mat_mkl_cpardiso_27", "Matrix checker", "None", mat_mkl_cpardiso->iparm[26], &icntl, &flg);
527: if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
529: PetscOptionsInt("-mat_mkl_cpardiso_31", "Partial solve and computing selected components of the solution vectors", "None", mat_mkl_cpardiso->iparm[30], &icntl, &flg);
530: if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
532: PetscOptionsInt("-mat_mkl_cpardiso_34", "Optimal number of threads for conditional numerical reproducibility (CNR) mode", "None", mat_mkl_cpardiso->iparm[33], &icntl, &flg);
533: if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
535: PetscOptionsInt("-mat_mkl_cpardiso_60", "Intel MKL_CPARDISO mode", "None", mat_mkl_cpardiso->iparm[59], &icntl, &flg);
536: if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
537: }
539: PetscOptionsEnd();
540: return 0;
541: }
543: PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
544: {
545: PetscInt bs;
546: PetscBool match;
547: PetscMPIInt size;
548: MPI_Comm comm;
551: MPI_Comm_dup(PetscObjectComm((PetscObject)A), &comm);
552: MPI_Comm_size(comm, &size);
553: mat_mkl_cpardiso->comm_mkl_cpardiso = MPI_Comm_c2f(comm);
555: mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
556: mat_mkl_cpardiso->maxfct = 1;
557: mat_mkl_cpardiso->mnum = 1;
558: mat_mkl_cpardiso->n = A->rmap->N;
559: if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
560: mat_mkl_cpardiso->msglvl = 0;
561: mat_mkl_cpardiso->nrhs = 1;
562: mat_mkl_cpardiso->err = 0;
563: mat_mkl_cpardiso->phase = -1;
564: #if defined(PETSC_USE_COMPLEX)
565: mat_mkl_cpardiso->mtype = 13;
566: #else
567: mat_mkl_cpardiso->mtype = 11;
568: #endif
570: #if defined(PETSC_USE_REAL_SINGLE)
571: mat_mkl_cpardiso->iparm[27] = 1;
572: #else
573: mat_mkl_cpardiso->iparm[27] = 0;
574: #endif
576: mat_mkl_cpardiso->iparm[0] = 1; /* Solver default parameters overridden with provided by iparm */
577: mat_mkl_cpardiso->iparm[1] = 2; /* Use METIS for fill-in reordering */
578: mat_mkl_cpardiso->iparm[5] = 0; /* Write solution into x */
579: mat_mkl_cpardiso->iparm[7] = 2; /* Max number of iterative refinement steps */
580: mat_mkl_cpardiso->iparm[9] = 13; /* Perturb the pivot elements with 1E-13 */
581: mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */
582: mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
583: mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
584: mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
585: mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */
587: mat_mkl_cpardiso->iparm[39] = 0;
588: if (size > 1) {
589: mat_mkl_cpardiso->iparm[39] = 2;
590: mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
591: mat_mkl_cpardiso->iparm[41] = A->rmap->rend - 1;
592: }
593: PetscObjectTypeCompareAny((PetscObject)A, &match, MATMPIBAIJ, MATMPISBAIJ, "");
594: if (match) {
595: MatGetBlockSize(A, &bs);
596: mat_mkl_cpardiso->iparm[36] = bs;
597: mat_mkl_cpardiso->iparm[40] /= bs;
598: mat_mkl_cpardiso->iparm[41] /= bs;
599: mat_mkl_cpardiso->iparm[40]++;
600: mat_mkl_cpardiso->iparm[41]++;
601: mat_mkl_cpardiso->iparm[34] = 0; /* Fortran style */
602: } else {
603: mat_mkl_cpardiso->iparm[34] = 1; /* C style */
604: }
606: mat_mkl_cpardiso->perm = 0;
607: return 0;
608: }
610: /*
611: * Symbolic decomposition. Mkl_Pardiso analysis phase.
612: */
613: PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
614: {
615: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data;
617: mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
619: /* Set MKL_CPARDISO options from the options database */
620: MatSetFromOptions_MKL_CPARDISO(F, A);
621: (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_INITIAL_MATRIX, &mat_mkl_cpardiso->nz, &mat_mkl_cpardiso->ia, &mat_mkl_cpardiso->ja, &mat_mkl_cpardiso->a);
623: mat_mkl_cpardiso->n = A->rmap->N;
624: if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
626: /* analysis phase */
627: /*----------------*/
628: mat_mkl_cpardiso->phase = JOB_ANALYSIS;
630: cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja,
631: mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, NULL, NULL, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err);
635: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
636: F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
637: F->ops->solve = MatSolve_MKL_CPARDISO;
638: F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO;
639: F->ops->matsolve = MatMatSolve_MKL_CPARDISO;
640: return 0;
641: }
643: PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_CPARDISO(Mat F, Mat A, IS perm, const MatFactorInfo *info)
644: {
645: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data;
647: mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
649: /* Set MKL_CPARDISO options from the options database */
650: MatSetFromOptions_MKL_CPARDISO(F, A);
651: (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_INITIAL_MATRIX, &mat_mkl_cpardiso->nz, &mat_mkl_cpardiso->ia, &mat_mkl_cpardiso->ja, &mat_mkl_cpardiso->a);
653: mat_mkl_cpardiso->n = A->rmap->N;
654: if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
656: if (A->spd == PETSC_BOOL3_TRUE) mat_mkl_cpardiso->mtype = 2;
657: else mat_mkl_cpardiso->mtype = -2;
659: /* analysis phase */
660: /*----------------*/
661: mat_mkl_cpardiso->phase = JOB_ANALYSIS;
663: cluster_sparse_solver(mat_mkl_cpardiso->pt, &mat_mkl_cpardiso->maxfct, &mat_mkl_cpardiso->mnum, &mat_mkl_cpardiso->mtype, &mat_mkl_cpardiso->phase, &mat_mkl_cpardiso->n, mat_mkl_cpardiso->a, mat_mkl_cpardiso->ia, mat_mkl_cpardiso->ja,
664: mat_mkl_cpardiso->perm, &mat_mkl_cpardiso->nrhs, mat_mkl_cpardiso->iparm, &mat_mkl_cpardiso->msglvl, NULL, NULL, &mat_mkl_cpardiso->comm_mkl_cpardiso, (PetscInt *)&mat_mkl_cpardiso->err);
668: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
669: F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_CPARDISO;
670: F->ops->solve = MatSolve_MKL_CPARDISO;
671: F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO;
672: F->ops->matsolve = MatMatSolve_MKL_CPARDISO;
673: return 0;
674: }
676: PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
677: {
678: PetscBool iascii;
679: PetscViewerFormat format;
680: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data;
681: PetscInt i;
683: /* check if matrix is mkl_cpardiso type */
684: if (A->ops->solve != MatSolve_MKL_CPARDISO) return 0;
686: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
687: if (iascii) {
688: PetscViewerGetFormat(viewer, &format);
689: if (format == PETSC_VIEWER_ASCII_INFO) {
690: PetscViewerASCIIPrintf(viewer, "MKL_CPARDISO run parameters:\n");
691: PetscViewerASCIIPrintf(viewer, "MKL_CPARDISO phase: %d \n", mat_mkl_cpardiso->phase);
692: for (i = 1; i <= 64; i++) PetscViewerASCIIPrintf(viewer, "MKL_CPARDISO iparm[%d]: %d \n", i, mat_mkl_cpardiso->iparm[i - 1]);
693: PetscViewerASCIIPrintf(viewer, "MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);
694: PetscViewerASCIIPrintf(viewer, "MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);
695: PetscViewerASCIIPrintf(viewer, "MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);
696: PetscViewerASCIIPrintf(viewer, "MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);
697: PetscViewerASCIIPrintf(viewer, "MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);
698: PetscViewerASCIIPrintf(viewer, "MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);
699: }
700: }
701: return 0;
702: }
704: PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
705: {
706: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)A->data;
708: info->block_size = 1.0;
709: info->nz_allocated = mat_mkl_cpardiso->nz + 0.0;
710: info->nz_unneeded = 0.0;
711: info->assemblies = 0.0;
712: info->mallocs = 0.0;
713: info->memory = 0.0;
714: info->fill_ratio_given = 0;
715: info->fill_ratio_needed = 0;
716: info->factor_mallocs = 0;
717: return 0;
718: }
720: PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F, PetscInt icntl, PetscInt ival)
721: {
722: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO *)F->data;
724: if (icntl <= 64) {
725: mat_mkl_cpardiso->iparm[icntl - 1] = ival;
726: } else {
727: if (icntl == 65) mkl_set_num_threads((int)ival);
728: else if (icntl == 66) mat_mkl_cpardiso->maxfct = ival;
729: else if (icntl == 67) mat_mkl_cpardiso->mnum = ival;
730: else if (icntl == 68) mat_mkl_cpardiso->msglvl = ival;
731: else if (icntl == 69) mat_mkl_cpardiso->mtype = ival;
732: }
733: return 0;
734: }
736: /*@
737: MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
739: Logically Collective
741: Input Parameters:
742: + F - the factored matrix obtained by calling `MatGetFactor()`
743: . icntl - index of Mkl_Pardiso parameter
744: - ival - value of Mkl_Pardiso parameter
746: Options Database Key:
747: . -mat_mkl_cpardiso_<icntl> <ival> - set the option numbered icntl to ival
749: Level: Intermediate
751: Note:
752: This routine cannot be used if you are solving the linear system with `TS`, `SNES`, or `KSP`, only if you directly call `MatGetFactor()` so use the options
753: database approach when working with `TS`, `SNES`, or `KSP`.
755: References:
756: . * - Mkl_Pardiso Users' Guide
758: .seealso: `MatGetFactor()`, `MATMPIAIJ`, `MATSOLVERMKL_CPARDISO`
759: @*/
760: PetscErrorCode MatMkl_CPardisoSetCntl(Mat F, PetscInt icntl, PetscInt ival)
761: {
762: PetscTryMethod(F, "MatMkl_CPardisoSetCntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
763: return 0;
764: }
766: /*MC
767: MATSOLVERMKL_CPARDISO - A matrix type providing direct solvers (LU) for parallel matrices via the external package MKL_CPARDISO.
769: Works with `MATMPIAIJ` matrices
771: Use -pc_type lu -pc_factor_mat_solver_type mkl_cpardiso to use this direct solver
773: Options Database Keys:
774: + -mat_mkl_cpardiso_65 - Suggested number of threads to use within MKL_CPARDISO
775: . -mat_mkl_cpardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time
776: . -mat_mkl_cpardiso_67 - Indicates the actual matrix for the solution phase
777: . -mat_mkl_cpardiso_68 - Message level information, use 1 to get detailed information on the solver options
778: . -mat_mkl_cpardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type
779: . -mat_mkl_cpardiso_1 - Use default values
780: . -mat_mkl_cpardiso_2 - Fill-in reducing ordering for the input matrix
781: . -mat_mkl_cpardiso_4 - Preconditioned CGS/CG
782: . -mat_mkl_cpardiso_5 - User permutation
783: . -mat_mkl_cpardiso_6 - Write solution on x
784: . -mat_mkl_cpardiso_8 - Iterative refinement step
785: . -mat_mkl_cpardiso_10 - Pivoting perturbation
786: . -mat_mkl_cpardiso_11 - Scaling vectors
787: . -mat_mkl_cpardiso_12 - Solve with transposed or conjugate transposed matrix A
788: . -mat_mkl_cpardiso_13 - Improved accuracy using (non-) symmetric weighted matching
789: . -mat_mkl_cpardiso_18 - Numbers of non-zero elements
790: . -mat_mkl_cpardiso_19 - Report number of floating point operations
791: . -mat_mkl_cpardiso_21 - Pivoting for symmetric indefinite matrices
792: . -mat_mkl_cpardiso_24 - Parallel factorization control
793: . -mat_mkl_cpardiso_25 - Parallel forward/backward solve control
794: . -mat_mkl_cpardiso_27 - Matrix checker
795: . -mat_mkl_cpardiso_31 - Partial solve and computing selected components of the solution vectors
796: . -mat_mkl_cpardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode
797: - -mat_mkl_cpardiso_60 - Intel MKL_CPARDISO mode
799: Level: beginner
801: Notes:
802: Use -mat_mkl_cpardiso_68 1 to display the number of threads the solver is using. MKL does not provide a way to directly access this
803: information.
805: For more information on the options check the MKL_CPARDISO manual
807: .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMkl_CPardisoSetCntl()`, `MatGetFactor()`
808: M*/
810: static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type)
811: {
812: *type = MATSOLVERMKL_CPARDISO;
813: return 0;
814: }
816: /* MatGetFactor for MPI AIJ matrices */
817: static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A, MatFactorType ftype, Mat *F)
818: {
819: Mat B;
820: Mat_MKL_CPARDISO *mat_mkl_cpardiso;
821: PetscBool isSeqAIJ, isMPIBAIJ, isMPISBAIJ;
823: /* Create the factorization matrix */
825: PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ);
826: PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &isMPIBAIJ);
827: PetscObjectTypeCompare((PetscObject)A, MATMPISBAIJ, &isMPISBAIJ);
828: MatCreate(PetscObjectComm((PetscObject)A), &B);
829: MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
830: PetscStrallocpy("mkl_cpardiso", &((PetscObject)B)->type_name);
831: MatSetUp(B);
833: PetscNew(&mat_mkl_cpardiso);
835: if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
836: else if (isMPIBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO;
837: else if (isMPISBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO;
838: else mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
840: if (ftype == MAT_FACTOR_LU) B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
841: else B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_CPARDISO;
842: B->ops->destroy = MatDestroy_MKL_CPARDISO;
844: B->ops->view = MatView_MKL_CPARDISO;
845: B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
847: B->factortype = ftype;
848: B->assembled = PETSC_TRUE; /* required by -ksp_view */
850: B->data = mat_mkl_cpardiso;
852: /* set solvertype */
853: PetscFree(B->solvertype);
854: PetscStrallocpy(MATSOLVERMKL_CPARDISO, &B->solvertype);
856: PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mkl_cpardiso);
857: PetscObjectComposeFunction((PetscObject)B, "MatMkl_CPardisoSetCntl_C", MatMkl_CPardisoSetCntl_MKL_CPARDISO);
858: PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);
860: *F = B;
861: return 0;
862: }
864: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void)
865: {
866: MatSolverTypeRegister(MATSOLVERMKL_CPARDISO, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_mpiaij_mkl_cpardiso);
867: MatSolverTypeRegister(MATSOLVERMKL_CPARDISO, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_mpiaij_mkl_cpardiso);
868: MatSolverTypeRegister(MATSOLVERMKL_CPARDISO, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_mpiaij_mkl_cpardiso);
869: MatSolverTypeRegister(MATSOLVERMKL_CPARDISO, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_mpiaij_mkl_cpardiso);
870: return 0;
871: }