Actual source code: mpibaijmkl.c
1: #include <../src/mat/impls/baij/mpi/mpibaij.h>
3: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
5: static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
6: {
7: Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
9: MatMPIBAIJSetPreallocation_MPIBAIJ(B, bs, d_nz, d_nnz, o_nz, o_nnz);
10: MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A);
11: MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B);
12: return 0;
13: }
15: static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
16: {
17: Mat B = *newmat;
19: if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A, MAT_COPY_VALUES, &B);
21: PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJMKL);
22: PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJMKL);
23: *newmat = B;
24: return 0;
25: }
27: /*@C
28: MatCreateBAIJMKL - Creates a sparse parallel matrix in `MATBAIJMKL` format (block compressed row).
30: Collective
32: Input Parameters:
33: + comm - MPI communicator
34: . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
35: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
36: . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
37: This value should be the same as the local size used in creating the
38: y vector for the matrix-vector product y = Ax.
39: . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
40: This value should be the same as the local size used in creating the
41: x vector for the matrix-vector product y = Ax.
42: . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
43: . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
44: . d_nz - number of nonzero blocks per block row in diagonal portion of local
45: submatrix (same for all local rows)
46: . d_nnz - array containing the number of nonzero blocks in the various block rows
47: of the in diagonal portion of the local (possibly different for each block
48: row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry
49: and set it even if it is zero.
50: . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
51: submatrix (same for all local rows).
52: - o_nnz - array containing the number of nonzero blocks in the various block rows of the
53: off-diagonal portion of the local submatrix (possibly different for
54: each block row) or NULL.
56: Output Parameter:
57: . A - the matrix
59: Options Database Keys:
60: + -mat_block_size - size of the blocks to use
61: - -mat_use_hash_table <fact> - set hash table factor
63: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
64: MatXXXXSetPreallocation() paradigm instead of this routine directly.
65: [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
67: Notes:
68: This type inherits from `MATBAIJ` and is largely identical, but uses sparse BLAS
69: routines from Intel MKL whenever possible.
70: `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
71: operations are currently supported.
72: If the installed version of MKL supports the "SpMV2" sparse
73: inspector-executor routines, then those are used by default.
74: Default PETSc kernels are used otherwise.
75: For good matrix assembly performance the user should preallocate the matrix
76: storage by setting the parameters `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
77: By setting these parameters accurately, performance can be increased by more
78: than a factor of 50.
80: If the *_nnz parameter is given then the *_nz parameter is ignored
82: A nonzero block is any block that as 1 or more nonzeros in it
84: The user MUST specify either the local or global matrix dimensions
85: (possibly both).
87: If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
88: than it must be used on all processors that share the object for that argument.
90: Storage Information:
91: For a square global matrix we define each processor's diagonal portion
92: to be its local rows and the corresponding columns (a square submatrix);
93: each processor's off-diagonal portion encompasses the remainder of the
94: local matrix (a rectangular submatrix).
96: The user can specify preallocated storage for the diagonal part of
97: the local submatrix with either `d_nz` or `d_nnz` (not both). Set
98: `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
99: memory allocation. Likewise, specify preallocated storage for the
100: off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
102: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
103: the figure below we depict these three local rows and all columns (0-11).
105: .vb
106: 0 1 2 3 4 5 6 7 8 9 10 11
107: --------------------------
108: row 3 |o o o d d d o o o o o o
109: row 4 |o o o d d d o o o o o o
110: row 5 |o o o d d d o o o o o o
111: --------------------------
112: .ve
114: Thus, any entries in the d locations are stored in the d (diagonal)
115: submatrix, and any entries in the o locations are stored in the
116: o (off-diagonal) submatrix. Note that the d and the o submatrices are
117: stored simply in the `MATSEQBAIJMKL` format for compressed row storage.
119: Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
120: and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
121: In general, for PDE problems in which most nonzeros are near the diagonal,
122: one expects `d_nz` >> `o_nz`. For large problems you MUST preallocate memory
123: or you will get TERRIBLE performance; see the users' manual chapter on
124: matrices.
126: Level: intermediate
128: .seealso: `MATBAIJMKL`, `MATBAIJ`, `MatCreate()`, `MatCreateSeqBAIJMKL()`, `MatSetValues()`, `MatCreateBAIJMKL()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
129: @*/
131: PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
132: {
133: PetscMPIInt size;
135: MatCreate(comm, A);
136: MatSetSizes(*A, m, n, M, N);
137: MPI_Comm_size(comm, &size);
138: if (size > 1) {
139: MatSetType(*A, MATMPIBAIJMKL);
140: MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz);
141: } else {
142: MatSetType(*A, MATSEQBAIJMKL);
143: MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz);
144: }
145: return 0;
146: }
148: PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)
149: {
150: MatSetType(A, MATMPIBAIJ);
151: MatConvert_MPIBAIJ_MPIBAIJMKL(A, MATMPIBAIJMKL, MAT_INPLACE_MATRIX, &A);
152: return 0;
153: }
155: /*MC
156: MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
158: This matrix type is identical to `MATSEQBAIJMKL` when constructed with a single process communicator,
159: and `MATMPIBAIJMKL` otherwise. As a result, for single process communicators,
160: `MatSeqBAIJSetPreallocation()` is supported, and similarly `MatMPIBAIJSetPreallocation()` is supported
161: for communicators controlling multiple processes. It is recommended that you call both of
162: the above preallocation routines for simplicity.
164: Options Database Keys:
165: . -mat_type baijmkl - sets the matrix type to `MATBAIJMKL` during a call to `MatSetFromOptions()`
167: Level: beginner
169: .seealso: `MatCreateBAIJMKL()`, `MATSEQBAIJMKL`, `MATMPIBAIJMKL`
170: M*/