Actual source code: aijmkl.c


  2: /*
  3:   Defines basic operations for the MATSEQAIJMKL matrix class.
  4:   This class is derived from the MATSEQAIJ class and retains the
  5:   compressed row storage (aka Yale sparse matrix format) but uses
  6:   sparse BLAS operations from the Intel Math Kernel Library (MKL)
  7:   wherever possible.
  8: */

 10: #include <../src/mat/impls/aij/seq/aij.h>
 11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
 12: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
 13:   #define MKL_ILP64
 14: #endif
 15: #include <mkl_spblas.h>

 17: typedef struct {
 18:   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 19:   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 20:   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 21:   PetscObjectState state;
 22: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 23:   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 24:   struct matrix_descr descr;
 25: #endif
 26: } Mat_SeqAIJMKL;

 28: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);

 30: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
 31: {
 32:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 33:   /* so we will ignore 'MatType type'. */
 34:   Mat B = *newmat;
 35: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 36:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
 37: #endif

 39:   if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A, MAT_COPY_VALUES, &B);

 41:   /* Reset the original function pointers. */
 42:   B->ops->duplicate               = MatDuplicate_SeqAIJ;
 43:   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
 44:   B->ops->destroy                 = MatDestroy_SeqAIJ;
 45:   B->ops->mult                    = MatMult_SeqAIJ;
 46:   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
 47:   B->ops->multadd                 = MatMultAdd_SeqAIJ;
 48:   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
 49:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
 50:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
 51:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 52:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
 53:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
 54:   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;

 56:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL);

 58: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 59:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
 60:    * simply involves destroying the MKL sparse matrix handle and then freeing
 61:    * the spptr pointer. */
 62:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;

 64:   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
 65: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 66:   PetscFree(B->spptr);

 68:   /* Change the type of B to MATSEQAIJ. */
 69:   PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);

 71:   *newmat = B;
 72:   return 0;
 73: }

 75: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 76: {
 77:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;


 80:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
 81:   if (aijmkl) {
 82:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
 83: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 84:     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
 85: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 86:     PetscFree(A->spptr);
 87:   }

 89:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
 90:    * to destroy everything that remains. */
 91:   PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
 92:   /* Note that I don't call MatSetType().  I believe this is because that
 93:    * is only to be called when *building* a matrix.  I could be wrong, but
 94:    * that is how things work for the SuperLU matrix class. */
 95:   PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL);
 96:   MatDestroy_SeqAIJ(A);
 97:   return 0;
 98: }

100: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
101:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
102:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
103:  * handle, creates a new one, and then calls mkl_sparse_optimize().
104:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
105:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
106:  * an unoptimized matrix handle here. */
107: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
108: {
109: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
110:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
111:    * does nothing. We make it callable anyway in this case because it cuts
112:    * down on littering the code with #ifdefs. */
113:   return 0;
114: #else
115:   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
116:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
117:   PetscInt       m, n;
118:   MatScalar     *aa;
119:   PetscInt      *aj, *ai;

121:   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
122:   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
123:    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
124:    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
125:    * mkl_sparse_optimize() later. */
126:   if (aijmkl->no_SpMV2) return 0;
127:   #endif

129:   if (aijmkl->sparse_optimized) {
130:     /* Matrix has been previously assembled and optimized. Must destroy old
131:      * matrix handle before running the optimization step again. */
132:     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
133:   }
134:   aijmkl->sparse_optimized = PETSC_FALSE;

136:   /* Now perform the SpMV2 setup and matrix optimization. */
137:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
138:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
139:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
140:   m                  = A->rmap->n;
141:   n                  = A->cmap->n;
142:   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
143:   aa                 = a->a; /* Nonzero elements stored row-by-row. */
144:   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
145:   if (a->nz && aa && !A->structure_only) {
146:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
147:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
148:     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
149:     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
150:     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
151:     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
152:     aijmkl->sparse_optimized = PETSC_TRUE;
153:     PetscObjectStateGet((PetscObject)A, &(aijmkl->state));
154:   } else {
155:     aijmkl->csrA = PETSC_NULL;
156:   }

158:   return 0;
159: #endif
160: }

162: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
163: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
164: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
165: {
166:   sparse_index_base_t indexing;
167:   PetscInt            m, n;
168:   PetscInt           *aj, *ai, *dummy;
169:   MatScalar          *aa;
170:   Mat_SeqAIJMKL      *aijmkl;

172:   if (csrA) {
173:     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
174:     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
176:   } else {
177:     aj = ai = PETSC_NULL;
178:     aa      = PETSC_NULL;
179:   }

181:   MatSetType(A, MATSEQAIJ);
182:   MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols);
183:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
184:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
185:    * they will be destroyed when the MKL handle is destroyed.
186:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
187:   if (csrA) {
188:     MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL);
189:   } else {
190:     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
191:     MatSetUp(A);
192:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
193:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
194:   }

196:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
197:    * Now turn it into a MATSEQAIJMKL. */
198:   MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A);

200:   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
201:   aijmkl->csrA = csrA;

203:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
204:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
205:    * and just need to be able to run the MKL optimization step. */
206:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
207:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
208:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
209:   if (csrA) {
210:     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
211:     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
212:   }
213:   PetscObjectStateGet((PetscObject)A, &(aijmkl->state));
214:   return 0;
215: }
216: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

218: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
219:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
220:  * MatMatMultNumeric(). */
221: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
222: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
223: {
224:   PetscInt            i;
225:   PetscInt            nrows, ncols;
226:   PetscInt            nz;
227:   PetscInt           *ai, *aj, *dummy;
228:   PetscScalar        *aa;
229:   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
230:   sparse_index_base_t indexing;

232:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
233:   if (!aijmkl->csrA) return 0;

235:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
236:   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);

238:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
239:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
240:   for (i = 0; i < nrows; i++) {
241:     nz = ai[i + 1] - ai[i];
242:     MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES);
243:   }

245:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
246:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);

248:   PetscObjectStateGet((PetscObject)A, &(aijmkl->state));
249:   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
250:    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
251:    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
252:   aijmkl->sparse_optimized = PETSC_FALSE;
253:   return 0;
254: }
255: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

257: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
258: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
259: {
260:   PetscInt            i, j, k;
261:   PetscInt            nrows, ncols;
262:   PetscInt            nz;
263:   PetscInt           *ai, *aj, *dummy;
264:   PetscScalar        *aa;
265:   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
266:   sparse_index_base_t indexing;

268:   PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");

270:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
271:   if (!aijmkl->csrA) {
272:     PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n");
273:     return 0;
274:   }

276:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
277:   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);

279:   k = 0;
280:   for (i = 0; i < nrows; i++) {
281:     PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i);
282:     nz = ai[i + 1] - ai[i];
283:     for (j = 0; j < nz; j++) {
284:       if (aa) {
285:         PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k]));
286:       } else {
287:         PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]);
288:       }
289:       k++;
290:     }
291:     PetscViewerASCIIPrintf(viewer, "\n");
292:   }
293:   return 0;
294: }
295: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

297: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
298: {
299:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
300:   Mat_SeqAIJMKL *aijmkl_dest;

302:   MatDuplicate_SeqAIJ(A, op, M);
303:   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
304:   PetscArraycpy(aijmkl_dest, aijmkl, 1);
305:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
306:   if (aijmkl->eager_inspection) MatSeqAIJMKL_create_mkl_handle(A);
307:   return 0;
308: }

310: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
311: {
312:   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
313:   Mat_SeqAIJMKL *aijmkl;

315:   if (mode == MAT_FLUSH_ASSEMBLY) return 0;

317:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
318:    * extra information and some different methods, call the AssemblyEnd
319:    * routine for a MATSEQAIJ.
320:    * I'm not sure if this is the best way to do this, but it avoids
321:    * a lot of code duplication. */
322:   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
323:   MatAssemblyEnd_SeqAIJ(A, mode);

325:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
326:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
327:   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
328:   if (aijmkl->eager_inspection) MatSeqAIJMKL_create_mkl_handle(A);

330:   return 0;
331: }

333: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
334: PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
335: {
336:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
337:   const PetscScalar *x;
338:   PetscScalar       *y;
339:   const MatScalar   *aa;
340:   PetscInt           m     = A->rmap->n;
341:   PetscInt           n     = A->cmap->n;
342:   PetscScalar        alpha = 1.0;
343:   PetscScalar        beta  = 0.0;
344:   const PetscInt    *aj, *ai;
345:   char               matdescra[6];

347:   /* Variables not in MatMult_SeqAIJ. */
348:   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */

350:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
351:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
352:   VecGetArrayRead(xx, &x);
353:   VecGetArray(yy, &y);
354:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
355:   aa = a->a; /* Nonzero elements stored row-by-row. */
356:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

358:   /* Call MKL sparse BLAS routine to do the MatMult. */
359:   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);

361:   PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
362:   VecRestoreArrayRead(xx, &x);
363:   VecRestoreArray(yy, &y);
364:   return 0;
365: }
366: #endif

368: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
369: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
370: {
371:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
372:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
373:   const PetscScalar *x;
374:   PetscScalar       *y;
375:   PetscObjectState   state;


378:   /* If there are no nonzero entries, zero yy and return immediately. */
379:   if (!a->nz) {
380:     VecGetArray(yy, &y);
381:     PetscArrayzero(y, A->rmap->n);
382:     VecRestoreArray(yy, &y);
383:     return 0;
384:   }

386:   VecGetArrayRead(xx, &x);
387:   VecGetArray(yy, &y);

389:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
390:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
391:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
392:   PetscObjectStateGet((PetscObject)A, &state);
393:   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);

395:   /* Call MKL SpMV2 executor routine to do the MatMult. */
396:   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);

398:   PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
399:   VecRestoreArrayRead(xx, &x);
400:   VecRestoreArray(yy, &y);
401:   return 0;
402: }
403: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

405: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
406: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
407: {
408:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
409:   const PetscScalar *x;
410:   PetscScalar       *y;
411:   const MatScalar   *aa;
412:   PetscInt           m     = A->rmap->n;
413:   PetscInt           n     = A->cmap->n;
414:   PetscScalar        alpha = 1.0;
415:   PetscScalar        beta  = 0.0;
416:   const PetscInt    *aj, *ai;
417:   char               matdescra[6];

419:   /* Variables not in MatMultTranspose_SeqAIJ. */
420:   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */

422:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
423:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
424:   VecGetArrayRead(xx, &x);
425:   VecGetArray(yy, &y);
426:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
427:   aa = a->a; /* Nonzero elements stored row-by-row. */
428:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

430:   /* Call MKL sparse BLAS routine to do the MatMult. */
431:   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);

433:   PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
434:   VecRestoreArrayRead(xx, &x);
435:   VecRestoreArray(yy, &y);
436:   return 0;
437: }
438: #endif

440: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
441: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
442: {
443:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
444:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
445:   const PetscScalar *x;
446:   PetscScalar       *y;
447:   PetscObjectState   state;


450:   /* If there are no nonzero entries, zero yy and return immediately. */
451:   if (!a->nz) {
452:     VecGetArray(yy, &y);
453:     PetscArrayzero(y, A->cmap->n);
454:     VecRestoreArray(yy, &y);
455:     return 0;
456:   }

458:   VecGetArrayRead(xx, &x);
459:   VecGetArray(yy, &y);

461:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
462:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
463:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
464:   PetscObjectStateGet((PetscObject)A, &state);
465:   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);

467:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
468:   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);

470:   PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
471:   VecRestoreArrayRead(xx, &x);
472:   VecRestoreArray(yy, &y);
473:   return 0;
474: }
475: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

477: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
478: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
479: {
480:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
481:   const PetscScalar *x;
482:   PetscScalar       *y, *z;
483:   const MatScalar   *aa;
484:   PetscInt           m = A->rmap->n;
485:   PetscInt           n = A->cmap->n;
486:   const PetscInt    *aj, *ai;
487:   PetscInt           i;

489:   /* Variables not in MatMultAdd_SeqAIJ. */
490:   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
491:   PetscScalar alpha  = 1.0;
492:   PetscScalar beta;
493:   char        matdescra[6];

495:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
496:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */

498:   VecGetArrayRead(xx, &x);
499:   VecGetArrayPair(yy, zz, &y, &z);
500:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
501:   aa = a->a; /* Nonzero elements stored row-by-row. */
502:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

504:   /* Call MKL sparse BLAS routine to do the MatMult. */
505:   if (zz == yy) {
506:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
507:     beta = 1.0;
508:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
509:   } else {
510:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
511:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
512:     beta = 0.0;
513:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
514:     for (i = 0; i < m; i++) z[i] += y[i];
515:   }

517:   PetscLogFlops(2.0 * a->nz);
518:   VecRestoreArrayRead(xx, &x);
519:   VecRestoreArrayPair(yy, zz, &y, &z);
520:   return 0;
521: }
522: #endif

524: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
525: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
526: {
527:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
528:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
529:   const PetscScalar *x;
530:   PetscScalar       *y, *z;
531:   PetscInt           m = A->rmap->n;
532:   PetscInt           i;

534:   /* Variables not in MatMultAdd_SeqAIJ. */
535:   PetscObjectState state;


538:   /* If there are no nonzero entries, set zz = yy and return immediately. */
539:   if (!a->nz) {
540:     VecGetArrayPair(yy, zz, &y, &z);
541:     PetscArraycpy(z, y, m);
542:     VecRestoreArrayPair(yy, zz, &y, &z);
543:     return 0;
544:   }

546:   VecGetArrayRead(xx, &x);
547:   VecGetArrayPair(yy, zz, &y, &z);

549:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
550:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
551:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
552:   PetscObjectStateGet((PetscObject)A, &state);
553:   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);

555:   /* Call MKL sparse BLAS routine to do the MatMult. */
556:   if (zz == yy) {
557:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
558:      * with alpha and beta both set to 1.0. */
559:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
560:   } else {
561:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
562:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
563:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
564:     for (i = 0; i < m; i++) z[i] += y[i];
565:   }

567:   PetscLogFlops(2.0 * a->nz);
568:   VecRestoreArrayRead(xx, &x);
569:   VecRestoreArrayPair(yy, zz, &y, &z);
570:   return 0;
571: }
572: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

574: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
575: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
576: {
577:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
578:   const PetscScalar *x;
579:   PetscScalar       *y, *z;
580:   const MatScalar   *aa;
581:   PetscInt           m = A->rmap->n;
582:   PetscInt           n = A->cmap->n;
583:   const PetscInt    *aj, *ai;
584:   PetscInt           i;

586:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
587:   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
588:   PetscScalar alpha  = 1.0;
589:   PetscScalar beta;
590:   char        matdescra[6];

592:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
593:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */

595:   VecGetArrayRead(xx, &x);
596:   VecGetArrayPair(yy, zz, &y, &z);
597:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
598:   aa = a->a; /* Nonzero elements stored row-by-row. */
599:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

601:   /* Call MKL sparse BLAS routine to do the MatMult. */
602:   if (zz == yy) {
603:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
604:     beta = 1.0;
605:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
606:   } else {
607:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
608:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
609:     beta = 0.0;
610:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
611:     for (i = 0; i < n; i++) z[i] += y[i];
612:   }

614:   PetscLogFlops(2.0 * a->nz);
615:   VecRestoreArrayRead(xx, &x);
616:   VecRestoreArrayPair(yy, zz, &y, &z);
617:   return 0;
618: }
619: #endif

621: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
622: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
623: {
624:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
625:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
626:   const PetscScalar *x;
627:   PetscScalar       *y, *z;
628:   PetscInt           n = A->cmap->n;
629:   PetscInt           i;
630:   PetscObjectState   state;

632:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */


635:   /* If there are no nonzero entries, set zz = yy and return immediately. */
636:   if (!a->nz) {
637:     VecGetArrayPair(yy, zz, &y, &z);
638:     PetscArraycpy(z, y, n);
639:     VecRestoreArrayPair(yy, zz, &y, &z);
640:     return 0;
641:   }

643:   VecGetArrayRead(xx, &x);
644:   VecGetArrayPair(yy, zz, &y, &z);

646:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
647:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
648:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
649:   PetscObjectStateGet((PetscObject)A, &state);
650:   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);

652:   /* Call MKL sparse BLAS routine to do the MatMult. */
653:   if (zz == yy) {
654:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
655:      * with alpha and beta both set to 1.0. */
656:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
657:   } else {
658:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
659:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
660:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
661:     for (i = 0; i < n; i++) z[i] += y[i];
662:   }

664:   PetscLogFlops(2.0 * a->nz);
665:   VecRestoreArrayRead(xx, &x);
666:   VecRestoreArrayPair(yy, zz, &y, &z);
667:   return 0;
668: }
669: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

671: /* -------------------------- MatProduct code -------------------------- */
672: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
673: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
674: {
675:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
676:   sparse_matrix_t     csrA, csrB, csrC;
677:   PetscInt            nrows, ncols;
678:   struct matrix_descr descr_type_gen;
679:   PetscObjectState    state;

681:   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
682:    * not handle sparse matrices with zero rows or columns. */
683:   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
684:   else nrows = A->cmap->N;
685:   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
686:   else ncols = B->rmap->N;

688:   PetscObjectStateGet((PetscObject)A, &state);
689:   if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
690:   PetscObjectStateGet((PetscObject)B, &state);
691:   if (!b->sparse_optimized || b->state != state) MatSeqAIJMKL_create_mkl_handle(B);
692:   csrA                = a->csrA;
693:   csrB                = b->csrA;
694:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

696:   if (csrA && csrB) {
697:     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
698:   } else {
699:     csrC = PETSC_NULL;
700:   }

702:   MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C);

704:   return 0;
705: }

707: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
708: {
709:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
710:   sparse_matrix_t     csrA, csrB, csrC;
711:   struct matrix_descr descr_type_gen;
712:   PetscObjectState    state;

714:   PetscObjectStateGet((PetscObject)A, &state);
715:   if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
716:   PetscObjectStateGet((PetscObject)B, &state);
717:   if (!b->sparse_optimized || b->state != state) MatSeqAIJMKL_create_mkl_handle(B);
718:   csrA                = a->csrA;
719:   csrB                = b->csrA;
720:   csrC                = c->csrA;
721:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

723:   if (csrA && csrB) {
724:     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
725:   } else {
726:     csrC = PETSC_NULL;
727:   }

729:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
730:   MatSeqAIJMKL_update_from_mkl_handle(C);

732:   return 0;
733: }

735: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
736: {
737:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
738:   return 0;
739: }

741: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
742: {
743:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
744:   return 0;
745: }

747: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
748: {
749:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
750:   return 0;
751: }

753: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
754: {
755:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
756:   return 0;
757: }

759: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
760: {
761:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C);
762:   return 0;
763: }

765: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
766: {
767:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C);
768:   return 0;
769: }

771: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
772: {
773:   Mat_Product *product = C->product;
774:   Mat          A = product->A, B = product->B;

776:   MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C);
777:   return 0;
778: }

780: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
781: {
782:   Mat_Product *product = C->product;
783:   Mat          A = product->A, B = product->B;
784:   PetscReal    fill = product->fill;

786:   MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C);
787:   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
788:   return 0;
789: }

791: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
792: {
793:   Mat                 Ct;
794:   Vec                 zeros;
795:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
796:   sparse_matrix_t     csrA, csrP, csrC;
797:   PetscBool           set, flag;
798:   struct matrix_descr descr_type_sym;
799:   PetscObjectState    state;

801:   MatIsSymmetricKnown(A, &set, &flag);

804:   PetscObjectStateGet((PetscObject)A, &state);
805:   if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
806:   PetscObjectStateGet((PetscObject)P, &state);
807:   if (!p->sparse_optimized || p->state != state) MatSeqAIJMKL_create_mkl_handle(P);
808:   csrA                = a->csrA;
809:   csrP                = p->csrA;
810:   csrC                = c->csrA;
811:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
812:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
813:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

815:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
816:   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);

818:   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
819:    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
820:    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
821:    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
822:    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
823:    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
824:    * the full matrix. */
825:   MatSeqAIJMKL_update_from_mkl_handle(C);
826:   MatTranspose(C, MAT_INITIAL_MATRIX, &Ct);
827:   MatCreateVecs(C, &zeros, NULL);
828:   VecSetFromOptions(zeros);
829:   VecZeroEntries(zeros);
830:   MatDiagonalSet(Ct, zeros, INSERT_VALUES);
831:   MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN);
832:   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
833:   MatProductCreateWithMat(A, P, PETSC_NULL, C);
834:   MatProductSetType(C, MATPRODUCT_PtAP);
835:   MatSeqAIJMKL_create_mkl_handle(C);
836:   VecDestroy(&zeros);
837:   MatDestroy(&Ct);
838:   return 0;
839: }

841: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
842: {
843:   Mat_Product        *product = C->product;
844:   Mat                 A = product->A, P = product->B;
845:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
846:   sparse_matrix_t     csrA, csrP, csrC;
847:   struct matrix_descr descr_type_sym;
848:   PetscObjectState    state;

850:   PetscObjectStateGet((PetscObject)A, &state);
851:   if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
852:   PetscObjectStateGet((PetscObject)P, &state);
853:   if (!p->sparse_optimized || p->state != state) MatSeqAIJMKL_create_mkl_handle(P);
854:   csrA                = a->csrA;
855:   csrP                = p->csrA;
856:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
857:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
858:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

860:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
861:   if (csrP && csrA) {
862:     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
863:   } else {
864:     csrC = PETSC_NULL;
865:   }

867:   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
868:    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
869:    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
870:    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
871:    * is guaranteed. */
872:   MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C);

874:   C->ops->productnumeric = MatProductNumeric_PtAP;
875:   return 0;
876: }

878: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
879: {
880:   C->ops->productsymbolic = MatProductSymbolic_AB;
881:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
882:   return 0;
883: }

885: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
886: {
887:   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
888:   return 0;
889: }

891: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
892: {
893:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
894:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
895:   return 0;
896: }

898: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
899: {
900:   Mat_Product *product = C->product;
901:   Mat          A       = product->A;
902:   PetscBool    set, flag;

904:   if (PetscDefined(USE_COMPLEX)) {
905:     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
906:      * We do this in several other locations in this file. This works for the time being, but these
907:      * routines are considered unsafe and may be removed from the MatProduct code in the future.
908:      * TODO: Add proper MATSEQAIJMKL implementations */
909:     C->ops->productsymbolic = NULL;
910:   } else {
911:     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
912:     MatIsSymmetricKnown(A, &set, &flag);
913:     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
914:     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
915:     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
916:      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
917:   }
918:   return 0;
919: }

921: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
922: {
923:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
924:   return 0;
925: }

927: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
928: {
929:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
930:   return 0;
931: }

933: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
934: {
935:   Mat_Product *product = C->product;

937:   switch (product->type) {
938:   case MATPRODUCT_AB:
939:     MatProductSetFromOptions_SeqAIJMKL_AB(C);
940:     break;
941:   case MATPRODUCT_AtB:
942:     MatProductSetFromOptions_SeqAIJMKL_AtB(C);
943:     break;
944:   case MATPRODUCT_ABt:
945:     MatProductSetFromOptions_SeqAIJMKL_ABt(C);
946:     break;
947:   case MATPRODUCT_PtAP:
948:     MatProductSetFromOptions_SeqAIJMKL_PtAP(C);
949:     break;
950:   case MATPRODUCT_RARt:
951:     MatProductSetFromOptions_SeqAIJMKL_RARt(C);
952:     break;
953:   case MATPRODUCT_ABC:
954:     MatProductSetFromOptions_SeqAIJMKL_ABC(C);
955:     break;
956:   default:
957:     break;
958:   }
959:   return 0;
960: }
961: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
962: /* ------------------------ End MatProduct code ------------------------ */

964: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
965:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
966:  * routine, but can also be used to convert an assembled SeqAIJ matrix
967:  * into a SeqAIJMKL one. */
968: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
969: {
970:   Mat            B = *newmat;
971:   Mat_SeqAIJMKL *aijmkl;
972:   PetscBool      set;
973:   PetscBool      sametype;

975:   if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A, MAT_COPY_VALUES, &B);

977:   PetscObjectTypeCompare((PetscObject)A, type, &sametype);
978:   if (sametype) return 0;

980:   PetscNew(&aijmkl);
981:   B->spptr = (void *)aijmkl;

983:   /* Set function pointers for methods that we inherit from AIJ but override.
984:    * We also parse some command line options below, since those determine some of the methods we point to. */
985:   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
986:   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
987:   B->ops->destroy     = MatDestroy_SeqAIJMKL;

989:   aijmkl->sparse_optimized = PETSC_FALSE;
990: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
991:   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
992: #else
993:   aijmkl->no_SpMV2 = PETSC_TRUE;
994: #endif
995:   aijmkl->eager_inspection = PETSC_FALSE;

997:   /* Parse command line options. */
998:   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
999:   PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set);
1000:   PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set);
1001:   PetscOptionsEnd();
1002: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1003:   if (!aijmkl->no_SpMV2) {
1004:     PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1005:     aijmkl->no_SpMV2 = PETSC_TRUE;
1006:   }
1007: #endif

1009: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1010:   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1011:   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1012:   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1013:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1014:   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1015:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1016:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1017:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1018:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1019:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1020:     #if !defined(PETSC_USE_COMPLEX)
1021:   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1022:     #else
1023:   B->ops->ptapnumeric = NULL;
1024:     #endif
1025:   #endif
1026: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

1028: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1029:   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1030:    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1031:    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1032:    * versions in which the older interface has not been deprecated, we use the old interface. */
1033:   if (aijmkl->no_SpMV2) {
1034:     B->ops->mult             = MatMult_SeqAIJMKL;
1035:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1036:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1037:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1038:   }
1039: #endif

1041:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ);

1043:   PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL);
1044:   *newmat = B;
1045:   return 0;
1046: }

1048: /*@C
1049:    MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1050:    This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1051:    routines from Intel MKL whenever possible.
1052:    If the installed version of MKL supports the "SpMV2" sparse
1053:    inspector-executor routines, then those are used by default.
1054:    `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1055:    (for symmetric A) operations are currently supported.
1056:    Note that MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.

1058:    Collective

1060:    Input Parameters:
1061: +  comm - MPI communicator, set to `PETSC_COMM_SELF`
1062: .  m - number of rows
1063: .  n - number of columns
1064: .  nz - number of nonzeros per row (same for all rows)
1065: -  nnz - array containing the number of nonzeros in the various rows
1066:          (possibly different for each row) or NULL

1068:    Output Parameter:
1069: .  A - the matrix

1071:    Options Database Keys:
1072: +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1073: -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied

1075:    Note:
1076:    If nnz is given then nz is ignored

1078:    Level: intermediate

1080: .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1081: @*/
1082: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1083: {
1084:   MatCreate(comm, A);
1085:   MatSetSizes(*A, m, n, m, n);
1086:   MatSetType(*A, MATSEQAIJMKL);
1087:   MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz);
1088:   return 0;
1089: }

1091: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1092: {
1093:   MatSetType(A, MATSEQAIJ);
1094:   MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A);
1095:   return 0;
1096: }