Actual source code: mpibaijmkl.c

  1: #include <../src/mat/impls/baij/mpi/mpibaij.h>

  3: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);

  5: static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
  6: {
  7:   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;

  9:   MatMPIBAIJSetPreallocation_MPIBAIJ(B, bs, d_nz, d_nnz, o_nz, o_nnz);
 10:   MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A);
 11:   MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B);
 12:   return 0;
 13: }

 15: static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
 16: {
 17:   Mat B = *newmat;

 19:   if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A, MAT_COPY_VALUES, &B);

 21:   PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJMKL);
 22:   PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJMKL);
 23:   *newmat = B;
 24:   return 0;
 25: }

 27: /*@C
 28:    MatCreateBAIJMKL - Creates a sparse parallel matrix in `MATBAIJMKL` format (block compressed row).

 30:    Collective

 32:    Input Parameters:
 33: +  comm - MPI communicator
 34: .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
 35:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
 36: .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
 37:            This value should be the same as the local size used in creating the
 38:            y vector for the matrix-vector product y = Ax.
 39: .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
 40:            This value should be the same as the local size used in creating the
 41:            x vector for the matrix-vector product y = Ax.
 42: .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
 43: .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
 44: .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
 45:            submatrix  (same for all local rows)
 46: .  d_nnz - array containing the number of nonzero blocks in the various block rows
 47:            of the in diagonal portion of the local (possibly different for each block
 48:            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
 49:            and set it even if it is zero.
 50: .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
 51:            submatrix (same for all local rows).
 52: -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
 53:            off-diagonal portion of the local submatrix (possibly different for
 54:            each block row) or NULL.

 56:    Output Parameter:
 57: .  A - the matrix

 59:    Options Database Keys:
 60: +   -mat_block_size - size of the blocks to use
 61: -   -mat_use_hash_table <fact> - set hash table factor

 63:    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
 64:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
 65:    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]

 67:    Notes:
 68:    This type inherits from `MATBAIJ` and is largely identical, but uses sparse BLAS
 69:    routines from Intel MKL whenever possible.
 70:    `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
 71:    operations are currently supported.
 72:    If the installed version of MKL supports the "SpMV2" sparse
 73:    inspector-executor routines, then those are used by default.
 74:    Default PETSc kernels are used otherwise.
 75:    For good matrix assembly performance the user should preallocate the matrix
 76:    storage by setting the parameters `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
 77:    By setting these parameters accurately, performance can be increased by more
 78:    than a factor of 50.

 80:    If the *_nnz parameter is given then the *_nz parameter is ignored

 82:    A nonzero block is any block that as 1 or more nonzeros in it

 84:    The user MUST specify either the local or global matrix dimensions
 85:    (possibly both).

 87:    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
 88:    than it must be used on all processors that share the object for that argument.

 90:    Storage Information:
 91:    For a square global matrix we define each processor's diagonal portion
 92:    to be its local rows and the corresponding columns (a square submatrix);
 93:    each processor's off-diagonal portion encompasses the remainder of the
 94:    local matrix (a rectangular submatrix).

 96:    The user can specify preallocated storage for the diagonal part of
 97:    the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
 98:    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
 99:    memory allocation.  Likewise, specify preallocated storage for the
100:    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).

102:    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
103:    the figure below we depict these three local rows and all columns (0-11).

105: .vb
106:            0 1 2 3 4 5 6 7 8 9 10 11
107:           --------------------------
108:    row 3  |o o o d d d o o o o  o  o
109:    row 4  |o o o d d d o o o o  o  o
110:    row 5  |o o o d d d o o o o  o  o
111:           --------------------------
112: .ve

114:    Thus, any entries in the d locations are stored in the d (diagonal)
115:    submatrix, and any entries in the o locations are stored in the
116:    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
117:    stored simply in the `MATSEQBAIJMKL` format for compressed row storage.

119:    Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
120:    and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
121:    In general, for PDE problems in which most nonzeros are near the diagonal,
122:    one expects `d_nz` >> `o_nz`.   For large problems you MUST preallocate memory
123:    or you will get TERRIBLE performance; see the users' manual chapter on
124:    matrices.

126:    Level: intermediate

128: .seealso: `MATBAIJMKL`, `MATBAIJ`, `MatCreate()`, `MatCreateSeqBAIJMKL()`, `MatSetValues()`, `MatCreateBAIJMKL()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
129: @*/

131: PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
132: {
133:   PetscMPIInt size;

135:   MatCreate(comm, A);
136:   MatSetSizes(*A, m, n, M, N);
137:   MPI_Comm_size(comm, &size);
138:   if (size > 1) {
139:     MatSetType(*A, MATMPIBAIJMKL);
140:     MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz);
141:   } else {
142:     MatSetType(*A, MATSEQBAIJMKL);
143:     MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz);
144:   }
145:   return 0;
146: }

148: PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)
149: {
150:   MatSetType(A, MATMPIBAIJ);
151:   MatConvert_MPIBAIJ_MPIBAIJMKL(A, MATMPIBAIJMKL, MAT_INPLACE_MATRIX, &A);
152:   return 0;
153: }

155: /*MC
156:    MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.

158:    This matrix type is identical to `MATSEQBAIJMKL` when constructed with a single process communicator,
159:    and `MATMPIBAIJMKL` otherwise.  As a result, for single process communicators,
160:   `MatSeqBAIJSetPreallocation()` is supported, and similarly `MatMPIBAIJSetPreallocation()` is supported
161:   for communicators controlling multiple processes.  It is recommended that you call both of
162:   the above preallocation routines for simplicity.

164:    Options Database Keys:
165: . -mat_type baijmkl - sets the matrix type to `MATBAIJMKL` during a call to `MatSetFromOptions()`

167:   Level: beginner

169: .seealso: `MatCreateBAIJMKL()`, `MATSEQBAIJMKL`, `MATMPIBAIJMKL`
170: M*/