Actual source code: aijmkl.c
2: /*
3: Defines basic operations for the MATSEQAIJMKL matrix class.
4: This class is derived from the MATSEQAIJ class and retains the
5: compressed row storage (aka Yale sparse matrix format) but uses
6: sparse BLAS operations from the Intel Math Kernel Library (MKL)
7: wherever possible.
8: */
10: #include <../src/mat/impls/aij/seq/aij.h>
11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
13: #define MKL_ILP64
14: #endif
15: #include <mkl_spblas.h>
17: typedef struct {
18: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
19: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
20: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
21: PetscObjectState state;
22: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
23: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
24: struct matrix_descr descr;
25: #endif
26: } Mat_SeqAIJMKL;
28: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
30: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
31: {
32: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
33: /* so we will ignore 'MatType type'. */
34: Mat B = *newmat;
35: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
36: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
37: #endif
39: if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A, MAT_COPY_VALUES, &B);
41: /* Reset the original function pointers. */
42: B->ops->duplicate = MatDuplicate_SeqAIJ;
43: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
44: B->ops->destroy = MatDestroy_SeqAIJ;
45: B->ops->mult = MatMult_SeqAIJ;
46: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
47: B->ops->multadd = MatMultAdd_SeqAIJ;
48: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
49: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ;
50: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
51: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
52: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
53: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
54: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
56: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL);
58: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
59: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
60: * simply involves destroying the MKL sparse matrix handle and then freeing
61: * the spptr pointer. */
62: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
64: if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
65: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
66: PetscFree(B->spptr);
68: /* Change the type of B to MATSEQAIJ. */
69: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
71: *newmat = B;
72: return 0;
73: }
75: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
76: {
77: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
80: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
81: if (aijmkl) {
82: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
83: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
84: if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
85: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
86: PetscFree(A->spptr);
87: }
89: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
90: * to destroy everything that remains. */
91: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
92: /* Note that I don't call MatSetType(). I believe this is because that
93: * is only to be called when *building* a matrix. I could be wrong, but
94: * that is how things work for the SuperLU matrix class. */
95: PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL);
96: MatDestroy_SeqAIJ(A);
97: return 0;
98: }
100: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
101: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
102: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
103: * handle, creates a new one, and then calls mkl_sparse_optimize().
104: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
105: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
106: * an unoptimized matrix handle here. */
107: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
108: {
109: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
110: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
111: * does nothing. We make it callable anyway in this case because it cuts
112: * down on littering the code with #ifdefs. */
113: return 0;
114: #else
115: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
116: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
117: PetscInt m, n;
118: MatScalar *aa;
119: PetscInt *aj, *ai;
121: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
122: /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
123: * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
124: * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
125: * mkl_sparse_optimize() later. */
126: if (aijmkl->no_SpMV2) return 0;
127: #endif
129: if (aijmkl->sparse_optimized) {
130: /* Matrix has been previously assembled and optimized. Must destroy old
131: * matrix handle before running the optimization step again. */
132: PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
133: }
134: aijmkl->sparse_optimized = PETSC_FALSE;
136: /* Now perform the SpMV2 setup and matrix optimization. */
137: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
138: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
139: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
140: m = A->rmap->n;
141: n = A->cmap->n;
142: aj = a->j; /* aj[k] gives column index for element aa[k]. */
143: aa = a->a; /* Nonzero elements stored row-by-row. */
144: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
145: if (a->nz && aa && !A->structure_only) {
146: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
147: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
148: PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
149: PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
150: PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
151: if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
152: aijmkl->sparse_optimized = PETSC_TRUE;
153: PetscObjectStateGet((PetscObject)A, &(aijmkl->state));
154: } else {
155: aijmkl->csrA = PETSC_NULL;
156: }
158: return 0;
159: #endif
160: }
162: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
163: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
164: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
165: {
166: sparse_index_base_t indexing;
167: PetscInt m, n;
168: PetscInt *aj, *ai, *dummy;
169: MatScalar *aa;
170: Mat_SeqAIJMKL *aijmkl;
172: if (csrA) {
173: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
174: PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
176: } else {
177: aj = ai = PETSC_NULL;
178: aa = PETSC_NULL;
179: }
181: MatSetType(A, MATSEQAIJ);
182: MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols);
183: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
184: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
185: * they will be destroyed when the MKL handle is destroyed.
186: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
187: if (csrA) {
188: MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL);
189: } else {
190: /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
191: MatSetUp(A);
192: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
193: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
194: }
196: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
197: * Now turn it into a MATSEQAIJMKL. */
198: MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A);
200: aijmkl = (Mat_SeqAIJMKL *)A->spptr;
201: aijmkl->csrA = csrA;
203: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
204: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
205: * and just need to be able to run the MKL optimization step. */
206: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
207: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
208: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
209: if (csrA) {
210: PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
211: PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
212: }
213: PetscObjectStateGet((PetscObject)A, &(aijmkl->state));
214: return 0;
215: }
216: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
218: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
219: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
220: * MatMatMultNumeric(). */
221: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
222: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
223: {
224: PetscInt i;
225: PetscInt nrows, ncols;
226: PetscInt nz;
227: PetscInt *ai, *aj, *dummy;
228: PetscScalar *aa;
229: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
230: sparse_index_base_t indexing;
232: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
233: if (!aijmkl->csrA) return 0;
235: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
236: PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
238: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
239: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
240: for (i = 0; i < nrows; i++) {
241: nz = ai[i + 1] - ai[i];
242: MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES);
243: }
245: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
246: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
248: PetscObjectStateGet((PetscObject)A, &(aijmkl->state));
249: /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
250: * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
251: * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
252: aijmkl->sparse_optimized = PETSC_FALSE;
253: return 0;
254: }
255: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
257: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
258: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
259: {
260: PetscInt i, j, k;
261: PetscInt nrows, ncols;
262: PetscInt nz;
263: PetscInt *ai, *aj, *dummy;
264: PetscScalar *aa;
265: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
266: sparse_index_base_t indexing;
268: PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");
270: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
271: if (!aijmkl->csrA) {
272: PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n");
273: return 0;
274: }
276: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
277: PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
279: k = 0;
280: for (i = 0; i < nrows; i++) {
281: PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i);
282: nz = ai[i + 1] - ai[i];
283: for (j = 0; j < nz; j++) {
284: if (aa) {
285: PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k]));
286: } else {
287: PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]);
288: }
289: k++;
290: }
291: PetscViewerASCIIPrintf(viewer, "\n");
292: }
293: return 0;
294: }
295: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
297: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
298: {
299: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
300: Mat_SeqAIJMKL *aijmkl_dest;
302: MatDuplicate_SeqAIJ(A, op, M);
303: aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
304: PetscArraycpy(aijmkl_dest, aijmkl, 1);
305: aijmkl_dest->sparse_optimized = PETSC_FALSE;
306: if (aijmkl->eager_inspection) MatSeqAIJMKL_create_mkl_handle(A);
307: return 0;
308: }
310: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
311: {
312: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
313: Mat_SeqAIJMKL *aijmkl;
315: if (mode == MAT_FLUSH_ASSEMBLY) return 0;
317: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
318: * extra information and some different methods, call the AssemblyEnd
319: * routine for a MATSEQAIJ.
320: * I'm not sure if this is the best way to do this, but it avoids
321: * a lot of code duplication. */
322: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
323: MatAssemblyEnd_SeqAIJ(A, mode);
325: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
326: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
327: aijmkl = (Mat_SeqAIJMKL *)A->spptr;
328: if (aijmkl->eager_inspection) MatSeqAIJMKL_create_mkl_handle(A);
330: return 0;
331: }
333: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
334: PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
335: {
336: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
337: const PetscScalar *x;
338: PetscScalar *y;
339: const MatScalar *aa;
340: PetscInt m = A->rmap->n;
341: PetscInt n = A->cmap->n;
342: PetscScalar alpha = 1.0;
343: PetscScalar beta = 0.0;
344: const PetscInt *aj, *ai;
345: char matdescra[6];
347: /* Variables not in MatMult_SeqAIJ. */
348: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
350: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
351: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
352: VecGetArrayRead(xx, &x);
353: VecGetArray(yy, &y);
354: aj = a->j; /* aj[k] gives column index for element aa[k]. */
355: aa = a->a; /* Nonzero elements stored row-by-row. */
356: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
358: /* Call MKL sparse BLAS routine to do the MatMult. */
359: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
361: PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
362: VecRestoreArrayRead(xx, &x);
363: VecRestoreArray(yy, &y);
364: return 0;
365: }
366: #endif
368: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
369: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
370: {
371: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
372: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
373: const PetscScalar *x;
374: PetscScalar *y;
375: PetscObjectState state;
378: /* If there are no nonzero entries, zero yy and return immediately. */
379: if (!a->nz) {
380: VecGetArray(yy, &y);
381: PetscArrayzero(y, A->rmap->n);
382: VecRestoreArray(yy, &y);
383: return 0;
384: }
386: VecGetArrayRead(xx, &x);
387: VecGetArray(yy, &y);
389: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
390: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
391: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
392: PetscObjectStateGet((PetscObject)A, &state);
393: if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
395: /* Call MKL SpMV2 executor routine to do the MatMult. */
396: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
398: PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
399: VecRestoreArrayRead(xx, &x);
400: VecRestoreArray(yy, &y);
401: return 0;
402: }
403: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
405: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
406: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
407: {
408: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
409: const PetscScalar *x;
410: PetscScalar *y;
411: const MatScalar *aa;
412: PetscInt m = A->rmap->n;
413: PetscInt n = A->cmap->n;
414: PetscScalar alpha = 1.0;
415: PetscScalar beta = 0.0;
416: const PetscInt *aj, *ai;
417: char matdescra[6];
419: /* Variables not in MatMultTranspose_SeqAIJ. */
420: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
422: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
423: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
424: VecGetArrayRead(xx, &x);
425: VecGetArray(yy, &y);
426: aj = a->j; /* aj[k] gives column index for element aa[k]. */
427: aa = a->a; /* Nonzero elements stored row-by-row. */
428: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
430: /* Call MKL sparse BLAS routine to do the MatMult. */
431: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
433: PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
434: VecRestoreArrayRead(xx, &x);
435: VecRestoreArray(yy, &y);
436: return 0;
437: }
438: #endif
440: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
441: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
442: {
443: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
444: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
445: const PetscScalar *x;
446: PetscScalar *y;
447: PetscObjectState state;
450: /* If there are no nonzero entries, zero yy and return immediately. */
451: if (!a->nz) {
452: VecGetArray(yy, &y);
453: PetscArrayzero(y, A->cmap->n);
454: VecRestoreArray(yy, &y);
455: return 0;
456: }
458: VecGetArrayRead(xx, &x);
459: VecGetArray(yy, &y);
461: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
462: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
463: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
464: PetscObjectStateGet((PetscObject)A, &state);
465: if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
467: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
468: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
470: PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
471: VecRestoreArrayRead(xx, &x);
472: VecRestoreArray(yy, &y);
473: return 0;
474: }
475: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
477: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
478: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
479: {
480: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
481: const PetscScalar *x;
482: PetscScalar *y, *z;
483: const MatScalar *aa;
484: PetscInt m = A->rmap->n;
485: PetscInt n = A->cmap->n;
486: const PetscInt *aj, *ai;
487: PetscInt i;
489: /* Variables not in MatMultAdd_SeqAIJ. */
490: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
491: PetscScalar alpha = 1.0;
492: PetscScalar beta;
493: char matdescra[6];
495: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
496: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
498: VecGetArrayRead(xx, &x);
499: VecGetArrayPair(yy, zz, &y, &z);
500: aj = a->j; /* aj[k] gives column index for element aa[k]. */
501: aa = a->a; /* Nonzero elements stored row-by-row. */
502: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
504: /* Call MKL sparse BLAS routine to do the MatMult. */
505: if (zz == yy) {
506: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
507: beta = 1.0;
508: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
509: } else {
510: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
511: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
512: beta = 0.0;
513: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
514: for (i = 0; i < m; i++) z[i] += y[i];
515: }
517: PetscLogFlops(2.0 * a->nz);
518: VecRestoreArrayRead(xx, &x);
519: VecRestoreArrayPair(yy, zz, &y, &z);
520: return 0;
521: }
522: #endif
524: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
525: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
526: {
527: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
528: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
529: const PetscScalar *x;
530: PetscScalar *y, *z;
531: PetscInt m = A->rmap->n;
532: PetscInt i;
534: /* Variables not in MatMultAdd_SeqAIJ. */
535: PetscObjectState state;
538: /* If there are no nonzero entries, set zz = yy and return immediately. */
539: if (!a->nz) {
540: VecGetArrayPair(yy, zz, &y, &z);
541: PetscArraycpy(z, y, m);
542: VecRestoreArrayPair(yy, zz, &y, &z);
543: return 0;
544: }
546: VecGetArrayRead(xx, &x);
547: VecGetArrayPair(yy, zz, &y, &z);
549: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
550: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
551: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
552: PetscObjectStateGet((PetscObject)A, &state);
553: if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
555: /* Call MKL sparse BLAS routine to do the MatMult. */
556: if (zz == yy) {
557: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
558: * with alpha and beta both set to 1.0. */
559: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
560: } else {
561: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
562: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
563: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
564: for (i = 0; i < m; i++) z[i] += y[i];
565: }
567: PetscLogFlops(2.0 * a->nz);
568: VecRestoreArrayRead(xx, &x);
569: VecRestoreArrayPair(yy, zz, &y, &z);
570: return 0;
571: }
572: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
574: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
575: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
576: {
577: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
578: const PetscScalar *x;
579: PetscScalar *y, *z;
580: const MatScalar *aa;
581: PetscInt m = A->rmap->n;
582: PetscInt n = A->cmap->n;
583: const PetscInt *aj, *ai;
584: PetscInt i;
586: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
587: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
588: PetscScalar alpha = 1.0;
589: PetscScalar beta;
590: char matdescra[6];
592: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
593: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
595: VecGetArrayRead(xx, &x);
596: VecGetArrayPair(yy, zz, &y, &z);
597: aj = a->j; /* aj[k] gives column index for element aa[k]. */
598: aa = a->a; /* Nonzero elements stored row-by-row. */
599: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
601: /* Call MKL sparse BLAS routine to do the MatMult. */
602: if (zz == yy) {
603: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
604: beta = 1.0;
605: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
606: } else {
607: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
608: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
609: beta = 0.0;
610: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
611: for (i = 0; i < n; i++) z[i] += y[i];
612: }
614: PetscLogFlops(2.0 * a->nz);
615: VecRestoreArrayRead(xx, &x);
616: VecRestoreArrayPair(yy, zz, &y, &z);
617: return 0;
618: }
619: #endif
621: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
622: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
623: {
624: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
625: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
626: const PetscScalar *x;
627: PetscScalar *y, *z;
628: PetscInt n = A->cmap->n;
629: PetscInt i;
630: PetscObjectState state;
632: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
635: /* If there are no nonzero entries, set zz = yy and return immediately. */
636: if (!a->nz) {
637: VecGetArrayPair(yy, zz, &y, &z);
638: PetscArraycpy(z, y, n);
639: VecRestoreArrayPair(yy, zz, &y, &z);
640: return 0;
641: }
643: VecGetArrayRead(xx, &x);
644: VecGetArrayPair(yy, zz, &y, &z);
646: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
647: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
648: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
649: PetscObjectStateGet((PetscObject)A, &state);
650: if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
652: /* Call MKL sparse BLAS routine to do the MatMult. */
653: if (zz == yy) {
654: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
655: * with alpha and beta both set to 1.0. */
656: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
657: } else {
658: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
659: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
660: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
661: for (i = 0; i < n; i++) z[i] += y[i];
662: }
664: PetscLogFlops(2.0 * a->nz);
665: VecRestoreArrayRead(xx, &x);
666: VecRestoreArrayPair(yy, zz, &y, &z);
667: return 0;
668: }
669: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
671: /* -------------------------- MatProduct code -------------------------- */
672: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
673: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
674: {
675: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
676: sparse_matrix_t csrA, csrB, csrC;
677: PetscInt nrows, ncols;
678: struct matrix_descr descr_type_gen;
679: PetscObjectState state;
681: /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
682: * not handle sparse matrices with zero rows or columns. */
683: if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
684: else nrows = A->cmap->N;
685: if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
686: else ncols = B->rmap->N;
688: PetscObjectStateGet((PetscObject)A, &state);
689: if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
690: PetscObjectStateGet((PetscObject)B, &state);
691: if (!b->sparse_optimized || b->state != state) MatSeqAIJMKL_create_mkl_handle(B);
692: csrA = a->csrA;
693: csrB = b->csrA;
694: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
696: if (csrA && csrB) {
697: PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
698: } else {
699: csrC = PETSC_NULL;
700: }
702: MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C);
704: return 0;
705: }
707: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
708: {
709: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
710: sparse_matrix_t csrA, csrB, csrC;
711: struct matrix_descr descr_type_gen;
712: PetscObjectState state;
714: PetscObjectStateGet((PetscObject)A, &state);
715: if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
716: PetscObjectStateGet((PetscObject)B, &state);
717: if (!b->sparse_optimized || b->state != state) MatSeqAIJMKL_create_mkl_handle(B);
718: csrA = a->csrA;
719: csrB = b->csrA;
720: csrC = c->csrA;
721: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
723: if (csrA && csrB) {
724: PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
725: } else {
726: csrC = PETSC_NULL;
727: }
729: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
730: MatSeqAIJMKL_update_from_mkl_handle(C);
732: return 0;
733: }
735: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
736: {
737: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
738: return 0;
739: }
741: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
742: {
743: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
744: return 0;
745: }
747: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
748: {
749: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
750: return 0;
751: }
753: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
754: {
755: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
756: return 0;
757: }
759: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
760: {
761: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C);
762: return 0;
763: }
765: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
766: {
767: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C);
768: return 0;
769: }
771: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
772: {
773: Mat_Product *product = C->product;
774: Mat A = product->A, B = product->B;
776: MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C);
777: return 0;
778: }
780: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
781: {
782: Mat_Product *product = C->product;
783: Mat A = product->A, B = product->B;
784: PetscReal fill = product->fill;
786: MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C);
787: C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
788: return 0;
789: }
791: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
792: {
793: Mat Ct;
794: Vec zeros;
795: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
796: sparse_matrix_t csrA, csrP, csrC;
797: PetscBool set, flag;
798: struct matrix_descr descr_type_sym;
799: PetscObjectState state;
801: MatIsSymmetricKnown(A, &set, &flag);
804: PetscObjectStateGet((PetscObject)A, &state);
805: if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
806: PetscObjectStateGet((PetscObject)P, &state);
807: if (!p->sparse_optimized || p->state != state) MatSeqAIJMKL_create_mkl_handle(P);
808: csrA = a->csrA;
809: csrP = p->csrA;
810: csrC = c->csrA;
811: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
812: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
813: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
815: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
816: PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
818: /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
819: * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
820: * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
821: * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
822: * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
823: * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
824: * the full matrix. */
825: MatSeqAIJMKL_update_from_mkl_handle(C);
826: MatTranspose(C, MAT_INITIAL_MATRIX, &Ct);
827: MatCreateVecs(C, &zeros, NULL);
828: VecSetFromOptions(zeros);
829: VecZeroEntries(zeros);
830: MatDiagonalSet(Ct, zeros, INSERT_VALUES);
831: MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN);
832: /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
833: MatProductCreateWithMat(A, P, PETSC_NULL, C);
834: MatProductSetType(C, MATPRODUCT_PtAP);
835: MatSeqAIJMKL_create_mkl_handle(C);
836: VecDestroy(&zeros);
837: MatDestroy(&Ct);
838: return 0;
839: }
841: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
842: {
843: Mat_Product *product = C->product;
844: Mat A = product->A, P = product->B;
845: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
846: sparse_matrix_t csrA, csrP, csrC;
847: struct matrix_descr descr_type_sym;
848: PetscObjectState state;
850: PetscObjectStateGet((PetscObject)A, &state);
851: if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
852: PetscObjectStateGet((PetscObject)P, &state);
853: if (!p->sparse_optimized || p->state != state) MatSeqAIJMKL_create_mkl_handle(P);
854: csrA = a->csrA;
855: csrP = p->csrA;
856: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
857: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
858: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
860: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
861: if (csrP && csrA) {
862: PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
863: } else {
864: csrC = PETSC_NULL;
865: }
867: /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
868: * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
869: * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
870: * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
871: * is guaranteed. */
872: MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C);
874: C->ops->productnumeric = MatProductNumeric_PtAP;
875: return 0;
876: }
878: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
879: {
880: C->ops->productsymbolic = MatProductSymbolic_AB;
881: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
882: return 0;
883: }
885: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
886: {
887: C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
888: return 0;
889: }
891: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
892: {
893: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
894: C->ops->productsymbolic = MatProductSymbolic_ABt;
895: return 0;
896: }
898: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
899: {
900: Mat_Product *product = C->product;
901: Mat A = product->A;
902: PetscBool set, flag;
904: if (PetscDefined(USE_COMPLEX)) {
905: /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
906: * We do this in several other locations in this file. This works for the time being, but these
907: * routines are considered unsafe and may be removed from the MatProduct code in the future.
908: * TODO: Add proper MATSEQAIJMKL implementations */
909: C->ops->productsymbolic = NULL;
910: } else {
911: /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
912: MatIsSymmetricKnown(A, &set, &flag);
913: if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
914: else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
915: /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
916: * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
917: }
918: return 0;
919: }
921: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
922: {
923: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
924: return 0;
925: }
927: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
928: {
929: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
930: return 0;
931: }
933: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
934: {
935: Mat_Product *product = C->product;
937: switch (product->type) {
938: case MATPRODUCT_AB:
939: MatProductSetFromOptions_SeqAIJMKL_AB(C);
940: break;
941: case MATPRODUCT_AtB:
942: MatProductSetFromOptions_SeqAIJMKL_AtB(C);
943: break;
944: case MATPRODUCT_ABt:
945: MatProductSetFromOptions_SeqAIJMKL_ABt(C);
946: break;
947: case MATPRODUCT_PtAP:
948: MatProductSetFromOptions_SeqAIJMKL_PtAP(C);
949: break;
950: case MATPRODUCT_RARt:
951: MatProductSetFromOptions_SeqAIJMKL_RARt(C);
952: break;
953: case MATPRODUCT_ABC:
954: MatProductSetFromOptions_SeqAIJMKL_ABC(C);
955: break;
956: default:
957: break;
958: }
959: return 0;
960: }
961: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
962: /* ------------------------ End MatProduct code ------------------------ */
964: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
965: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL()
966: * routine, but can also be used to convert an assembled SeqAIJ matrix
967: * into a SeqAIJMKL one. */
968: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
969: {
970: Mat B = *newmat;
971: Mat_SeqAIJMKL *aijmkl;
972: PetscBool set;
973: PetscBool sametype;
975: if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A, MAT_COPY_VALUES, &B);
977: PetscObjectTypeCompare((PetscObject)A, type, &sametype);
978: if (sametype) return 0;
980: PetscNew(&aijmkl);
981: B->spptr = (void *)aijmkl;
983: /* Set function pointers for methods that we inherit from AIJ but override.
984: * We also parse some command line options below, since those determine some of the methods we point to. */
985: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
986: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
987: B->ops->destroy = MatDestroy_SeqAIJMKL;
989: aijmkl->sparse_optimized = PETSC_FALSE;
990: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
991: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
992: #else
993: aijmkl->no_SpMV2 = PETSC_TRUE;
994: #endif
995: aijmkl->eager_inspection = PETSC_FALSE;
997: /* Parse command line options. */
998: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
999: PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set);
1000: PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set);
1001: PetscOptionsEnd();
1002: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1003: if (!aijmkl->no_SpMV2) {
1004: PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n");
1005: aijmkl->no_SpMV2 = PETSC_TRUE;
1006: }
1007: #endif
1009: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1010: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
1011: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
1012: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
1013: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1014: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1015: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL;
1016: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1017: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1018: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1019: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1020: #if !defined(PETSC_USE_COMPLEX)
1021: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1022: #else
1023: B->ops->ptapnumeric = NULL;
1024: #endif
1025: #endif
1026: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1028: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1029: /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1030: * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1031: * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1032: * versions in which the older interface has not been deprecated, we use the old interface. */
1033: if (aijmkl->no_SpMV2) {
1034: B->ops->mult = MatMult_SeqAIJMKL;
1035: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1036: B->ops->multadd = MatMultAdd_SeqAIJMKL;
1037: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1038: }
1039: #endif
1041: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ);
1043: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL);
1044: *newmat = B;
1045: return 0;
1046: }
1048: /*@C
1049: MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1050: This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1051: routines from Intel MKL whenever possible.
1052: If the installed version of MKL supports the "SpMV2" sparse
1053: inspector-executor routines, then those are used by default.
1054: `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1055: (for symmetric A) operations are currently supported.
1056: Note that MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1058: Collective
1060: Input Parameters:
1061: + comm - MPI communicator, set to `PETSC_COMM_SELF`
1062: . m - number of rows
1063: . n - number of columns
1064: . nz - number of nonzeros per row (same for all rows)
1065: - nnz - array containing the number of nonzeros in the various rows
1066: (possibly different for each row) or NULL
1068: Output Parameter:
1069: . A - the matrix
1071: Options Database Keys:
1072: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1073: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1075: Note:
1076: If nnz is given then nz is ignored
1078: Level: intermediate
1080: .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1081: @*/
1082: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1083: {
1084: MatCreate(comm, A);
1085: MatSetSizes(*A, m, n, m, n);
1086: MatSetType(*A, MATSEQAIJMKL);
1087: MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz);
1088: return 0;
1089: }
1091: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1092: {
1093: MatSetType(A, MATSEQAIJ);
1094: MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A);
1095: return 0;
1096: }