Actual source code: matmatmult.c
2: /*
3: Defines matrix-matrix product routines for pairs of SeqAIJ matrices
4: C = A * B
5: */
7: #include <../src/mat/impls/aij/seq/aij.h>
8: #include <../src/mat/utils/freespace.h>
9: #include <petscbt.h>
10: #include <petsc/private/isimpl.h>
11: #include <../src/mat/impls/dense/seq/dense.h>
13: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A, Mat B, Mat C)
14: {
15: if (C->ops->matmultnumeric) (*C->ops->matmultnumeric)(A, B, C);
16: else MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A, B, C);
17: return 0;
18: }
20: /* Modified from MatCreateSeqAIJWithArrays() */
21: PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], MatType mtype, Mat mat)
22: {
23: PetscInt ii;
24: Mat_SeqAIJ *aij;
25: PetscBool isseqaij, osingle, ofree_a, ofree_ij;
28: MatSetSizes(mat, m, n, m, n);
30: if (!mtype) {
31: PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQAIJ, &isseqaij);
32: if (!isseqaij) MatSetType(mat, MATSEQAIJ);
33: } else {
34: MatSetType(mat, mtype);
35: }
37: aij = (Mat_SeqAIJ *)(mat)->data;
38: osingle = aij->singlemalloc;
39: ofree_a = aij->free_a;
40: ofree_ij = aij->free_ij;
41: /* changes the free flags */
42: MatSeqAIJSetPreallocation_SeqAIJ(mat, MAT_SKIP_ALLOCATION, NULL);
44: PetscFree(aij->ilen);
45: PetscFree(aij->imax);
46: PetscMalloc1(m, &aij->imax);
47: PetscMalloc1(m, &aij->ilen);
48: for (ii = 0, aij->nonzerorowcnt = 0, aij->rmax = 0; ii < m; ii++) {
49: const PetscInt rnz = i[ii + 1] - i[ii];
50: aij->nonzerorowcnt += !!rnz;
51: aij->rmax = PetscMax(aij->rmax, rnz);
52: aij->ilen[ii] = aij->imax[ii] = i[ii + 1] - i[ii];
53: }
54: aij->maxnz = i[m];
55: aij->nz = i[m];
57: if (osingle) {
58: PetscFree3(aij->a, aij->j, aij->i);
59: } else {
60: if (ofree_a) PetscFree(aij->a);
61: if (ofree_ij) PetscFree(aij->j);
62: if (ofree_ij) PetscFree(aij->i);
63: }
64: aij->i = i;
65: aij->j = j;
66: aij->a = a;
67: aij->nonew = -1; /* this indicates that inserting a new value in the matrix that generates a new nonzero is an error */
68: /* default to not retain ownership */
69: aij->singlemalloc = PETSC_FALSE;
70: aij->free_a = PETSC_FALSE;
71: aij->free_ij = PETSC_FALSE;
72: MatCheckCompressedRow(mat, aij->nonzerorowcnt, &aij->compressedrow, aij->i, m, 0.6);
73: return 0;
74: }
76: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat B, PetscReal fill, Mat C)
77: {
78: Mat_Product *product = C->product;
79: MatProductAlgorithm alg;
80: PetscBool flg;
82: if (product) {
83: alg = product->alg;
84: } else {
85: alg = "sorted";
86: }
87: /* sorted */
88: PetscStrcmp(alg, "sorted", &flg);
89: if (flg) {
90: MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A, B, fill, C);
91: return 0;
92: }
94: /* scalable */
95: PetscStrcmp(alg, "scalable", &flg);
96: if (flg) {
97: MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A, B, fill, C);
98: return 0;
99: }
101: /* scalable_fast */
102: PetscStrcmp(alg, "scalable_fast", &flg);
103: if (flg) {
104: MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A, B, fill, C);
105: return 0;
106: }
108: /* heap */
109: PetscStrcmp(alg, "heap", &flg);
110: if (flg) {
111: MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A, B, fill, C);
112: return 0;
113: }
115: /* btheap */
116: PetscStrcmp(alg, "btheap", &flg);
117: if (flg) {
118: MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A, B, fill, C);
119: return 0;
120: }
122: /* llcondensed */
123: PetscStrcmp(alg, "llcondensed", &flg);
124: if (flg) {
125: MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A, B, fill, C);
126: return 0;
127: }
129: /* rowmerge */
130: PetscStrcmp(alg, "rowmerge", &flg);
131: if (flg) {
132: MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A, B, fill, C);
133: return 0;
134: }
136: #if defined(PETSC_HAVE_HYPRE)
137: PetscStrcmp(alg, "hypre", &flg);
138: if (flg) {
139: MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A, B, fill, C);
140: return 0;
141: }
142: #endif
144: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
145: }
147: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A, Mat B, PetscReal fill, Mat C)
148: {
149: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
150: PetscInt *ai = a->i, *bi = b->i, *ci, *cj;
151: PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
152: PetscReal afill;
153: PetscInt i, j, anzi, brow, bnzj, cnzi, *bj, *aj, *lnk, ndouble = 0, Crmax;
154: PetscTable ta;
155: PetscBT lnkbt;
156: PetscFreeSpaceList free_space = NULL, current_space = NULL;
158: /* Get ci and cj */
159: /*---------------*/
160: /* Allocate ci array, arrays for fill computation and */
161: /* free space for accumulating nonzero column info */
162: PetscMalloc1(am + 2, &ci);
163: ci[0] = 0;
165: /* create and initialize a linked list */
166: PetscTableCreate(bn, bn, &ta);
167: MatRowMergeMax_SeqAIJ(b, bm, ta);
168: PetscTableGetCount(ta, &Crmax);
169: PetscTableDestroy(&ta);
171: PetscLLCondensedCreate(Crmax, bn, &lnk, &lnkbt);
173: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
174: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space);
176: current_space = free_space;
178: /* Determine ci and cj */
179: for (i = 0; i < am; i++) {
180: anzi = ai[i + 1] - ai[i];
181: aj = a->j + ai[i];
182: for (j = 0; j < anzi; j++) {
183: brow = aj[j];
184: bnzj = bi[brow + 1] - bi[brow];
185: bj = b->j + bi[brow];
186: /* add non-zero cols of B into the sorted linked list lnk */
187: PetscLLCondensedAddSorted(bnzj, bj, lnk, lnkbt);
188: }
189: /* add possible missing diagonal entry */
190: if (C->force_diagonals) PetscLLCondensedAddSorted(1, &i, lnk, lnkbt);
191: cnzi = lnk[0];
193: /* If free space is not available, make more free space */
194: /* Double the amount of total space in the list */
195: if (current_space->local_remaining < cnzi) {
196: PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space);
197: ndouble++;
198: }
200: /* Copy data into free space, then initialize lnk */
201: PetscLLCondensedClean(bn, cnzi, current_space->array, lnk, lnkbt);
203: current_space->array += cnzi;
204: current_space->local_used += cnzi;
205: current_space->local_remaining -= cnzi;
207: ci[i + 1] = ci[i] + cnzi;
208: }
210: /* Column indices are in the list of free space */
211: /* Allocate space for cj, initialize cj, and */
212: /* destroy list of free space and other temporary array(s) */
213: PetscMalloc1(ci[am] + 1, &cj);
214: PetscFreeSpaceContiguous(&free_space, cj);
215: PetscLLCondensedDestroy(lnk, lnkbt);
217: /* put together the new symbolic matrix */
218: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C);
219: MatSetBlockSizesFromMats(C, A, B);
221: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
222: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
223: c = (Mat_SeqAIJ *)(C->data);
224: c->free_a = PETSC_FALSE;
225: c->free_ij = PETSC_TRUE;
226: c->nonew = 0;
228: /* fast, needs non-scalable O(bn) array 'abdense' */
229: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
231: /* set MatInfo */
232: afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5;
233: if (afill < 1.0) afill = 1.0;
234: C->info.mallocs = ndouble;
235: C->info.fill_ratio_given = fill;
236: C->info.fill_ratio_needed = afill;
238: #if defined(PETSC_USE_INFO)
239: if (ci[am]) {
240: PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill);
241: PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill);
242: } else {
243: PetscInfo(C, "Empty matrix product\n");
244: }
245: #endif
246: return 0;
247: }
249: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A, Mat B, Mat C)
250: {
251: PetscLogDouble flops = 0.0;
252: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
253: Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
254: Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
255: PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bjj, *ci = c->i, *cj = c->j;
256: PetscInt am = A->rmap->n, cm = C->rmap->n;
257: PetscInt i, j, k, anzi, bnzi, cnzi, brow;
258: PetscScalar *ca, valtmp;
259: PetscScalar *ab_dense;
260: PetscContainer cab_dense;
261: const PetscScalar *aa, *ba, *baj;
263: MatSeqAIJGetArrayRead(A, &aa);
264: MatSeqAIJGetArrayRead(B, &ba);
265: if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
266: PetscMalloc1(ci[cm] + 1, &ca);
267: c->a = ca;
268: c->free_a = PETSC_TRUE;
269: } else ca = c->a;
271: /* TODO this should be done in the symbolic phase */
272: /* However, this function is so heavily used (sometimes in an hidden way through multnumeric function pointers
273: that is hard to eradicate) */
274: PetscObjectQuery((PetscObject)C, "__PETSc__ab_dense", (PetscObject *)&cab_dense);
275: if (!cab_dense) {
276: PetscMalloc1(B->cmap->N, &ab_dense);
277: PetscContainerCreate(PETSC_COMM_SELF, &cab_dense);
278: PetscContainerSetPointer(cab_dense, ab_dense);
279: PetscContainerSetUserDestroy(cab_dense, PetscContainerUserDestroyDefault);
280: PetscObjectCompose((PetscObject)C, "__PETSc__ab_dense", (PetscObject)cab_dense);
281: PetscObjectDereference((PetscObject)cab_dense);
282: }
283: PetscContainerGetPointer(cab_dense, (void **)&ab_dense);
284: PetscArrayzero(ab_dense, B->cmap->N);
286: /* clean old values in C */
287: PetscArrayzero(ca, ci[cm]);
288: /* Traverse A row-wise. */
289: /* Build the ith row in C by summing over nonzero columns in A, */
290: /* the rows of B corresponding to nonzeros of A. */
291: for (i = 0; i < am; i++) {
292: anzi = ai[i + 1] - ai[i];
293: for (j = 0; j < anzi; j++) {
294: brow = aj[j];
295: bnzi = bi[brow + 1] - bi[brow];
296: bjj = bj + bi[brow];
297: baj = ba + bi[brow];
298: /* perform dense axpy */
299: valtmp = aa[j];
300: for (k = 0; k < bnzi; k++) ab_dense[bjj[k]] += valtmp * baj[k];
301: flops += 2 * bnzi;
302: }
303: aj += anzi;
304: aa += anzi;
306: cnzi = ci[i + 1] - ci[i];
307: for (k = 0; k < cnzi; k++) {
308: ca[k] += ab_dense[cj[k]];
309: ab_dense[cj[k]] = 0.0; /* zero ab_dense */
310: }
311: flops += cnzi;
312: cj += cnzi;
313: ca += cnzi;
314: }
315: #if defined(PETSC_HAVE_DEVICE)
316: if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU;
317: #endif
318: MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
319: MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
320: PetscLogFlops(flops);
321: MatSeqAIJRestoreArrayRead(A, &aa);
322: MatSeqAIJRestoreArrayRead(B, &ba);
323: return 0;
324: }
326: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A, Mat B, Mat C)
327: {
328: PetscLogDouble flops = 0.0;
329: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
330: Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
331: Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
332: PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bjj, *ci = c->i, *cj = c->j;
333: PetscInt am = A->rmap->N, cm = C->rmap->N;
334: PetscInt i, j, k, anzi, bnzi, cnzi, brow;
335: PetscScalar *ca = c->a, valtmp;
336: const PetscScalar *aa, *ba, *baj;
337: PetscInt nextb;
339: MatSeqAIJGetArrayRead(A, &aa);
340: MatSeqAIJGetArrayRead(B, &ba);
341: if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
342: PetscMalloc1(ci[cm] + 1, &ca);
343: c->a = ca;
344: c->free_a = PETSC_TRUE;
345: }
347: /* clean old values in C */
348: PetscArrayzero(ca, ci[cm]);
349: /* Traverse A row-wise. */
350: /* Build the ith row in C by summing over nonzero columns in A, */
351: /* the rows of B corresponding to nonzeros of A. */
352: for (i = 0; i < am; i++) {
353: anzi = ai[i + 1] - ai[i];
354: cnzi = ci[i + 1] - ci[i];
355: for (j = 0; j < anzi; j++) {
356: brow = aj[j];
357: bnzi = bi[brow + 1] - bi[brow];
358: bjj = bj + bi[brow];
359: baj = ba + bi[brow];
360: /* perform sparse axpy */
361: valtmp = aa[j];
362: nextb = 0;
363: for (k = 0; nextb < bnzi; k++) {
364: if (cj[k] == bjj[nextb]) { /* ccol == bcol */
365: ca[k] += valtmp * baj[nextb++];
366: }
367: }
368: flops += 2 * bnzi;
369: }
370: aj += anzi;
371: aa += anzi;
372: cj += cnzi;
373: ca += cnzi;
374: }
375: #if defined(PETSC_HAVE_DEVICE)
376: if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU;
377: #endif
378: MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
379: MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
380: PetscLogFlops(flops);
381: MatSeqAIJRestoreArrayRead(A, &aa);
382: MatSeqAIJRestoreArrayRead(B, &ba);
383: return 0;
384: }
386: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A, Mat B, PetscReal fill, Mat C)
387: {
388: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
389: PetscInt *ai = a->i, *bi = b->i, *ci, *cj;
390: PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
391: MatScalar *ca;
392: PetscReal afill;
393: PetscInt i, j, anzi, brow, bnzj, cnzi, *bj, *aj, *lnk, ndouble = 0, Crmax;
394: PetscTable ta;
395: PetscFreeSpaceList free_space = NULL, current_space = NULL;
397: /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */
398: /*-----------------------------------------------------------------------------------------*/
399: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
400: PetscMalloc1(am + 2, &ci);
401: ci[0] = 0;
403: /* create and initialize a linked list */
404: PetscTableCreate(bn, bn, &ta);
405: MatRowMergeMax_SeqAIJ(b, bm, ta);
406: PetscTableGetCount(ta, &Crmax);
407: PetscTableDestroy(&ta);
409: PetscLLCondensedCreate_fast(Crmax, &lnk);
411: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
412: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space);
413: current_space = free_space;
415: /* Determine ci and cj */
416: for (i = 0; i < am; i++) {
417: anzi = ai[i + 1] - ai[i];
418: aj = a->j + ai[i];
419: for (j = 0; j < anzi; j++) {
420: brow = aj[j];
421: bnzj = bi[brow + 1] - bi[brow];
422: bj = b->j + bi[brow];
423: /* add non-zero cols of B into the sorted linked list lnk */
424: PetscLLCondensedAddSorted_fast(bnzj, bj, lnk);
425: }
426: /* add possible missing diagonal entry */
427: if (C->force_diagonals) PetscLLCondensedAddSorted_fast(1, &i, lnk);
428: cnzi = lnk[1];
430: /* If free space is not available, make more free space */
431: /* Double the amount of total space in the list */
432: if (current_space->local_remaining < cnzi) {
433: PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space);
434: ndouble++;
435: }
437: /* Copy data into free space, then initialize lnk */
438: PetscLLCondensedClean_fast(cnzi, current_space->array, lnk);
440: current_space->array += cnzi;
441: current_space->local_used += cnzi;
442: current_space->local_remaining -= cnzi;
444: ci[i + 1] = ci[i] + cnzi;
445: }
447: /* Column indices are in the list of free space */
448: /* Allocate space for cj, initialize cj, and */
449: /* destroy list of free space and other temporary array(s) */
450: PetscMalloc1(ci[am] + 1, &cj);
451: PetscFreeSpaceContiguous(&free_space, cj);
452: PetscLLCondensedDestroy_fast(lnk);
454: /* Allocate space for ca */
455: PetscCalloc1(ci[am] + 1, &ca);
457: /* put together the new symbolic matrix */
458: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, ca, ((PetscObject)A)->type_name, C);
459: MatSetBlockSizesFromMats(C, A, B);
461: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
462: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
463: c = (Mat_SeqAIJ *)(C->data);
464: c->free_a = PETSC_TRUE;
465: c->free_ij = PETSC_TRUE;
466: c->nonew = 0;
468: /* slower, less memory */
469: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;
471: /* set MatInfo */
472: afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5;
473: if (afill < 1.0) afill = 1.0;
474: C->info.mallocs = ndouble;
475: C->info.fill_ratio_given = fill;
476: C->info.fill_ratio_needed = afill;
478: #if defined(PETSC_USE_INFO)
479: if (ci[am]) {
480: PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill);
481: PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill);
482: } else {
483: PetscInfo(C, "Empty matrix product\n");
484: }
485: #endif
486: return 0;
487: }
489: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A, Mat B, PetscReal fill, Mat C)
490: {
491: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
492: PetscInt *ai = a->i, *bi = b->i, *ci, *cj;
493: PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
494: MatScalar *ca;
495: PetscReal afill;
496: PetscInt i, j, anzi, brow, bnzj, cnzi, *bj, *aj, *lnk, ndouble = 0, Crmax;
497: PetscTable ta;
498: PetscFreeSpaceList free_space = NULL, current_space = NULL;
500: /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */
501: /*---------------------------------------------------------------------------------------------*/
502: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
503: PetscMalloc1(am + 2, &ci);
504: ci[0] = 0;
506: /* create and initialize a linked list */
507: PetscTableCreate(bn, bn, &ta);
508: MatRowMergeMax_SeqAIJ(b, bm, ta);
509: PetscTableGetCount(ta, &Crmax);
510: PetscTableDestroy(&ta);
511: PetscLLCondensedCreate_Scalable(Crmax, &lnk);
513: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
514: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space);
515: current_space = free_space;
517: /* Determine ci and cj */
518: for (i = 0; i < am; i++) {
519: anzi = ai[i + 1] - ai[i];
520: aj = a->j + ai[i];
521: for (j = 0; j < anzi; j++) {
522: brow = aj[j];
523: bnzj = bi[brow + 1] - bi[brow];
524: bj = b->j + bi[brow];
525: /* add non-zero cols of B into the sorted linked list lnk */
526: PetscLLCondensedAddSorted_Scalable(bnzj, bj, lnk);
527: }
528: /* add possible missing diagonal entry */
529: if (C->force_diagonals) PetscLLCondensedAddSorted_Scalable(1, &i, lnk);
531: cnzi = lnk[0];
533: /* If free space is not available, make more free space */
534: /* Double the amount of total space in the list */
535: if (current_space->local_remaining < cnzi) {
536: PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space);
537: ndouble++;
538: }
540: /* Copy data into free space, then initialize lnk */
541: PetscLLCondensedClean_Scalable(cnzi, current_space->array, lnk);
543: current_space->array += cnzi;
544: current_space->local_used += cnzi;
545: current_space->local_remaining -= cnzi;
547: ci[i + 1] = ci[i] + cnzi;
548: }
550: /* Column indices are in the list of free space */
551: /* Allocate space for cj, initialize cj, and */
552: /* destroy list of free space and other temporary array(s) */
553: PetscMalloc1(ci[am] + 1, &cj);
554: PetscFreeSpaceContiguous(&free_space, cj);
555: PetscLLCondensedDestroy_Scalable(lnk);
557: /* Allocate space for ca */
558: /*-----------------------*/
559: PetscCalloc1(ci[am] + 1, &ca);
561: /* put together the new symbolic matrix */
562: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, ca, ((PetscObject)A)->type_name, C);
563: MatSetBlockSizesFromMats(C, A, B);
565: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
566: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
567: c = (Mat_SeqAIJ *)(C->data);
568: c->free_a = PETSC_TRUE;
569: c->free_ij = PETSC_TRUE;
570: c->nonew = 0;
572: /* slower, less memory */
573: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;
575: /* set MatInfo */
576: afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5;
577: if (afill < 1.0) afill = 1.0;
578: C->info.mallocs = ndouble;
579: C->info.fill_ratio_given = fill;
580: C->info.fill_ratio_needed = afill;
582: #if defined(PETSC_USE_INFO)
583: if (ci[am]) {
584: PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill);
585: PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill);
586: } else {
587: PetscInfo(C, "Empty matrix product\n");
588: }
589: #endif
590: return 0;
591: }
593: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A, Mat B, PetscReal fill, Mat C)
594: {
595: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
596: const PetscInt *ai = a->i, *bi = b->i, *aj = a->j, *bj = b->j;
597: PetscInt *ci, *cj, *bb;
598: PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
599: PetscReal afill;
600: PetscInt i, j, col, ndouble = 0;
601: PetscFreeSpaceList free_space = NULL, current_space = NULL;
602: PetscHeap h;
604: /* Get ci and cj - by merging sorted rows using a heap */
605: /*---------------------------------------------------------------------------------------------*/
606: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
607: PetscMalloc1(am + 2, &ci);
608: ci[0] = 0;
610: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
611: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space);
612: current_space = free_space;
614: PetscHeapCreate(a->rmax, &h);
615: PetscMalloc1(a->rmax, &bb);
617: /* Determine ci and cj */
618: for (i = 0; i < am; i++) {
619: const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
620: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
621: ci[i + 1] = ci[i];
622: /* Populate the min heap */
623: for (j = 0; j < anzi; j++) {
624: bb[j] = bi[acol[j]]; /* bb points at the start of the row */
625: if (bb[j] < bi[acol[j] + 1]) { /* Add if row is nonempty */
626: PetscHeapAdd(h, j, bj[bb[j]++]);
627: }
628: }
629: /* Pick off the min element, adding it to free space */
630: PetscHeapPop(h, &j, &col);
631: while (j >= 0) {
632: if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
633: PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2, current_space->total_array_size), 16 << 20), ¤t_space);
634: ndouble++;
635: }
636: *(current_space->array++) = col;
637: current_space->local_used++;
638: current_space->local_remaining--;
639: ci[i + 1]++;
641: /* stash if anything else remains in this row of B */
642: if (bb[j] < bi[acol[j] + 1]) PetscHeapStash(h, j, bj[bb[j]++]);
643: while (1) { /* pop and stash any other rows of B that also had an entry in this column */
644: PetscInt j2, col2;
645: PetscHeapPeek(h, &j2, &col2);
646: if (col2 != col) break;
647: PetscHeapPop(h, &j2, &col2);
648: if (bb[j2] < bi[acol[j2] + 1]) PetscHeapStash(h, j2, bj[bb[j2]++]);
649: }
650: /* Put any stashed elements back into the min heap */
651: PetscHeapUnstash(h);
652: PetscHeapPop(h, &j, &col);
653: }
654: }
655: PetscFree(bb);
656: PetscHeapDestroy(&h);
658: /* Column indices are in the list of free space */
659: /* Allocate space for cj, initialize cj, and */
660: /* destroy list of free space and other temporary array(s) */
661: PetscMalloc1(ci[am], &cj);
662: PetscFreeSpaceContiguous(&free_space, cj);
664: /* put together the new symbolic matrix */
665: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C);
666: MatSetBlockSizesFromMats(C, A, B);
668: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
669: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
670: c = (Mat_SeqAIJ *)(C->data);
671: c->free_a = PETSC_TRUE;
672: c->free_ij = PETSC_TRUE;
673: c->nonew = 0;
675: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
677: /* set MatInfo */
678: afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5;
679: if (afill < 1.0) afill = 1.0;
680: C->info.mallocs = ndouble;
681: C->info.fill_ratio_given = fill;
682: C->info.fill_ratio_needed = afill;
684: #if defined(PETSC_USE_INFO)
685: if (ci[am]) {
686: PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill);
687: PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill);
688: } else {
689: PetscInfo(C, "Empty matrix product\n");
690: }
691: #endif
692: return 0;
693: }
695: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A, Mat B, PetscReal fill, Mat C)
696: {
697: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
698: const PetscInt *ai = a->i, *bi = b->i, *aj = a->j, *bj = b->j;
699: PetscInt *ci, *cj, *bb;
700: PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
701: PetscReal afill;
702: PetscInt i, j, col, ndouble = 0;
703: PetscFreeSpaceList free_space = NULL, current_space = NULL;
704: PetscHeap h;
705: PetscBT bt;
707: /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */
708: /*---------------------------------------------------------------------------------------------*/
709: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
710: PetscMalloc1(am + 2, &ci);
711: ci[0] = 0;
713: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
714: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space);
716: current_space = free_space;
718: PetscHeapCreate(a->rmax, &h);
719: PetscMalloc1(a->rmax, &bb);
720: PetscBTCreate(bn, &bt);
722: /* Determine ci and cj */
723: for (i = 0; i < am; i++) {
724: const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
725: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
726: const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */
727: ci[i + 1] = ci[i];
728: /* Populate the min heap */
729: for (j = 0; j < anzi; j++) {
730: PetscInt brow = acol[j];
731: for (bb[j] = bi[brow]; bb[j] < bi[brow + 1]; bb[j]++) {
732: PetscInt bcol = bj[bb[j]];
733: if (!PetscBTLookupSet(bt, bcol)) { /* new entry */
734: PetscHeapAdd(h, j, bcol);
735: bb[j]++;
736: break;
737: }
738: }
739: }
740: /* Pick off the min element, adding it to free space */
741: PetscHeapPop(h, &j, &col);
742: while (j >= 0) {
743: if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
744: fptr = NULL; /* need PetscBTMemzero */
745: PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2, current_space->total_array_size), 16 << 20), ¤t_space);
746: ndouble++;
747: }
748: *(current_space->array++) = col;
749: current_space->local_used++;
750: current_space->local_remaining--;
751: ci[i + 1]++;
753: /* stash if anything else remains in this row of B */
754: for (; bb[j] < bi[acol[j] + 1]; bb[j]++) {
755: PetscInt bcol = bj[bb[j]];
756: if (!PetscBTLookupSet(bt, bcol)) { /* new entry */
757: PetscHeapAdd(h, j, bcol);
758: bb[j]++;
759: break;
760: }
761: }
762: PetscHeapPop(h, &j, &col);
763: }
764: if (fptr) { /* Clear the bits for this row */
765: for (; fptr < current_space->array; fptr++) PetscBTClear(bt, *fptr);
766: } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */
767: PetscBTMemzero(bn, bt);
768: }
769: }
770: PetscFree(bb);
771: PetscHeapDestroy(&h);
772: PetscBTDestroy(&bt);
774: /* Column indices are in the list of free space */
775: /* Allocate space for cj, initialize cj, and */
776: /* destroy list of free space and other temporary array(s) */
777: PetscMalloc1(ci[am], &cj);
778: PetscFreeSpaceContiguous(&free_space, cj);
780: /* put together the new symbolic matrix */
781: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C);
782: MatSetBlockSizesFromMats(C, A, B);
784: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
785: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
786: c = (Mat_SeqAIJ *)(C->data);
787: c->free_a = PETSC_TRUE;
788: c->free_ij = PETSC_TRUE;
789: c->nonew = 0;
791: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
793: /* set MatInfo */
794: afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5;
795: if (afill < 1.0) afill = 1.0;
796: C->info.mallocs = ndouble;
797: C->info.fill_ratio_given = fill;
798: C->info.fill_ratio_needed = afill;
800: #if defined(PETSC_USE_INFO)
801: if (ci[am]) {
802: PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill);
803: PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill);
804: } else {
805: PetscInfo(C, "Empty matrix product\n");
806: }
807: #endif
808: return 0;
809: }
811: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A, Mat B, PetscReal fill, Mat C)
812: {
813: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
814: const PetscInt *ai = a->i, *bi = b->i, *aj = a->j, *bj = b->j, *inputi, *inputj, *inputcol, *inputcol_L1;
815: PetscInt *ci, *cj, *outputj, worki_L1[9], worki_L2[9];
816: PetscInt c_maxmem, a_maxrownnz = 0, a_rownnz;
817: const PetscInt workcol[8] = {0, 1, 2, 3, 4, 5, 6, 7};
818: const PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
819: const PetscInt *brow_ptr[8], *brow_end[8];
820: PetscInt window[8];
821: PetscInt window_min, old_window_min, ci_nnz, outputi_nnz = 0, L1_nrows, L2_nrows;
822: PetscInt i, k, ndouble = 0, L1_rowsleft, rowsleft;
823: PetscReal afill;
824: PetscInt *workj_L1, *workj_L2, *workj_L3;
825: PetscInt L1_nnz, L2_nnz;
827: /* Step 1: Get upper bound on memory required for allocation.
828: Because of the way virtual memory works,
829: only the memory pages that are actually needed will be physically allocated. */
830: PetscMalloc1(am + 1, &ci);
831: for (i = 0; i < am; i++) {
832: const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
833: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
834: a_rownnz = 0;
835: for (k = 0; k < anzi; ++k) {
836: a_rownnz += bi[acol[k] + 1] - bi[acol[k]];
837: if (a_rownnz > bn) {
838: a_rownnz = bn;
839: break;
840: }
841: }
842: a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz);
843: }
844: /* temporary work areas for merging rows */
845: PetscMalloc1(a_maxrownnz * 8, &workj_L1);
846: PetscMalloc1(a_maxrownnz * 8, &workj_L2);
847: PetscMalloc1(a_maxrownnz, &workj_L3);
849: /* This should be enough for almost all matrices. If not, memory is reallocated later. */
850: c_maxmem = 8 * (ai[am] + bi[bm]);
851: /* Step 2: Populate pattern for C */
852: PetscMalloc1(c_maxmem, &cj);
854: ci_nnz = 0;
855: ci[0] = 0;
856: worki_L1[0] = 0;
857: worki_L2[0] = 0;
858: for (i = 0; i < am; i++) {
859: const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
860: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
861: rowsleft = anzi;
862: inputcol_L1 = acol;
863: L2_nnz = 0;
864: L2_nrows = 1; /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1 */
865: worki_L2[1] = 0;
866: outputi_nnz = 0;
868: /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem -> allocate more memory */
869: while (ci_nnz + a_maxrownnz > c_maxmem) {
870: c_maxmem *= 2;
871: ndouble++;
872: PetscRealloc(sizeof(PetscInt) * c_maxmem, &cj);
873: }
875: while (rowsleft) {
876: L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */
877: L1_nrows = 0;
878: L1_nnz = 0;
879: inputcol = inputcol_L1;
880: inputi = bi;
881: inputj = bj;
883: /* The following macro is used to specialize for small rows in A.
884: This helps with compiler unrolling, improving performance substantially.
885: Input: inputj inputi inputcol bn
886: Output: outputj outputi_nnz */
887: #define MatMatMultSymbolic_RowMergeMacro(ANNZ) \
888: window_min = bn; \
889: outputi_nnz = 0; \
890: for (k = 0; k < ANNZ; ++k) { \
891: brow_ptr[k] = inputj + inputi[inputcol[k]]; \
892: brow_end[k] = inputj + inputi[inputcol[k] + 1]; \
893: window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \
894: window_min = PetscMin(window[k], window_min); \
895: } \
896: while (window_min < bn) { \
897: outputj[outputi_nnz++] = window_min; \
898: /* advance front and compute new minimum */ \
899: old_window_min = window_min; \
900: window_min = bn; \
901: for (k = 0; k < ANNZ; ++k) { \
902: if (window[k] == old_window_min) { \
903: brow_ptr[k]++; \
904: window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \
905: } \
906: window_min = PetscMin(window[k], window_min); \
907: } \
908: }
910: /************** L E V E L 1 ***************/
911: /* Merge up to 8 rows of B to L1 work array*/
912: while (L1_rowsleft) {
913: outputi_nnz = 0;
914: if (anzi > 8) outputj = workj_L1 + L1_nnz; /* Level 1 rowmerge*/
915: else outputj = cj + ci_nnz; /* Merge directly to C */
917: switch (L1_rowsleft) {
918: case 1:
919: brow_ptr[0] = inputj + inputi[inputcol[0]];
920: brow_end[0] = inputj + inputi[inputcol[0] + 1];
921: for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
922: inputcol += L1_rowsleft;
923: rowsleft -= L1_rowsleft;
924: L1_rowsleft = 0;
925: break;
926: case 2:
927: MatMatMultSymbolic_RowMergeMacro(2);
928: inputcol += L1_rowsleft;
929: rowsleft -= L1_rowsleft;
930: L1_rowsleft = 0;
931: break;
932: case 3:
933: MatMatMultSymbolic_RowMergeMacro(3);
934: inputcol += L1_rowsleft;
935: rowsleft -= L1_rowsleft;
936: L1_rowsleft = 0;
937: break;
938: case 4:
939: MatMatMultSymbolic_RowMergeMacro(4);
940: inputcol += L1_rowsleft;
941: rowsleft -= L1_rowsleft;
942: L1_rowsleft = 0;
943: break;
944: case 5:
945: MatMatMultSymbolic_RowMergeMacro(5);
946: inputcol += L1_rowsleft;
947: rowsleft -= L1_rowsleft;
948: L1_rowsleft = 0;
949: break;
950: case 6:
951: MatMatMultSymbolic_RowMergeMacro(6);
952: inputcol += L1_rowsleft;
953: rowsleft -= L1_rowsleft;
954: L1_rowsleft = 0;
955: break;
956: case 7:
957: MatMatMultSymbolic_RowMergeMacro(7);
958: inputcol += L1_rowsleft;
959: rowsleft -= L1_rowsleft;
960: L1_rowsleft = 0;
961: break;
962: default:
963: MatMatMultSymbolic_RowMergeMacro(8);
964: inputcol += 8;
965: rowsleft -= 8;
966: L1_rowsleft -= 8;
967: break;
968: }
969: inputcol_L1 = inputcol;
970: L1_nnz += outputi_nnz;
971: worki_L1[++L1_nrows] = L1_nnz;
972: }
974: /********************** L E V E L 2 ************************/
975: /* Merge from L1 work array to either C or to L2 work array */
976: if (anzi > 8) {
977: inputi = worki_L1;
978: inputj = workj_L1;
979: inputcol = workcol;
980: outputi_nnz = 0;
982: if (anzi <= 64) outputj = cj + ci_nnz; /* Merge from L1 work array to C */
983: else outputj = workj_L2 + L2_nnz; /* Merge from L1 work array to L2 work array */
985: switch (L1_nrows) {
986: case 1:
987: brow_ptr[0] = inputj + inputi[inputcol[0]];
988: brow_end[0] = inputj + inputi[inputcol[0] + 1];
989: for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
990: break;
991: case 2:
992: MatMatMultSymbolic_RowMergeMacro(2);
993: break;
994: case 3:
995: MatMatMultSymbolic_RowMergeMacro(3);
996: break;
997: case 4:
998: MatMatMultSymbolic_RowMergeMacro(4);
999: break;
1000: case 5:
1001: MatMatMultSymbolic_RowMergeMacro(5);
1002: break;
1003: case 6:
1004: MatMatMultSymbolic_RowMergeMacro(6);
1005: break;
1006: case 7:
1007: MatMatMultSymbolic_RowMergeMacro(7);
1008: break;
1009: case 8:
1010: MatMatMultSymbolic_RowMergeMacro(8);
1011: break;
1012: default:
1013: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatMatMult logic error: Not merging 1-8 rows from L1 work array!");
1014: }
1015: L2_nnz += outputi_nnz;
1016: worki_L2[++L2_nrows] = L2_nnz;
1018: /************************ L E V E L 3 **********************/
1019: /* Merge from L2 work array to either C or to L2 work array */
1020: if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) {
1021: inputi = worki_L2;
1022: inputj = workj_L2;
1023: inputcol = workcol;
1024: outputi_nnz = 0;
1025: if (rowsleft) outputj = workj_L3;
1026: else outputj = cj + ci_nnz;
1027: switch (L2_nrows) {
1028: case 1:
1029: brow_ptr[0] = inputj + inputi[inputcol[0]];
1030: brow_end[0] = inputj + inputi[inputcol[0] + 1];
1031: for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
1032: break;
1033: case 2:
1034: MatMatMultSymbolic_RowMergeMacro(2);
1035: break;
1036: case 3:
1037: MatMatMultSymbolic_RowMergeMacro(3);
1038: break;
1039: case 4:
1040: MatMatMultSymbolic_RowMergeMacro(4);
1041: break;
1042: case 5:
1043: MatMatMultSymbolic_RowMergeMacro(5);
1044: break;
1045: case 6:
1046: MatMatMultSymbolic_RowMergeMacro(6);
1047: break;
1048: case 7:
1049: MatMatMultSymbolic_RowMergeMacro(7);
1050: break;
1051: case 8:
1052: MatMatMultSymbolic_RowMergeMacro(8);
1053: break;
1054: default:
1055: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatMatMult logic error: Not merging 1-8 rows from L2 work array!");
1056: }
1057: L2_nrows = 1;
1058: L2_nnz = outputi_nnz;
1059: worki_L2[1] = outputi_nnz;
1060: /* Copy to workj_L2 */
1061: if (rowsleft) {
1062: for (k = 0; k < outputi_nnz; ++k) workj_L2[k] = outputj[k];
1063: }
1064: }
1065: }
1066: } /* while (rowsleft) */
1067: #undef MatMatMultSymbolic_RowMergeMacro
1069: /* terminate current row */
1070: ci_nnz += outputi_nnz;
1071: ci[i + 1] = ci_nnz;
1072: }
1074: /* Step 3: Create the new symbolic matrix */
1075: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C);
1076: MatSetBlockSizesFromMats(C, A, B);
1078: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1079: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
1080: c = (Mat_SeqAIJ *)(C->data);
1081: c->free_a = PETSC_TRUE;
1082: c->free_ij = PETSC_TRUE;
1083: c->nonew = 0;
1085: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
1087: /* set MatInfo */
1088: afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5;
1089: if (afill < 1.0) afill = 1.0;
1090: C->info.mallocs = ndouble;
1091: C->info.fill_ratio_given = fill;
1092: C->info.fill_ratio_needed = afill;
1094: #if defined(PETSC_USE_INFO)
1095: if (ci[am]) {
1096: PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill);
1097: PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill);
1098: } else {
1099: PetscInfo(C, "Empty matrix product\n");
1100: }
1101: #endif
1103: /* Step 4: Free temporary work areas */
1104: PetscFree(workj_L1);
1105: PetscFree(workj_L2);
1106: PetscFree(workj_L3);
1107: return 0;
1108: }
1110: /* concatenate unique entries and then sort */
1111: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A, Mat B, PetscReal fill, Mat C)
1112: {
1113: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
1114: const PetscInt *ai = a->i, *bi = b->i, *aj = a->j, *bj = b->j;
1115: PetscInt *ci, *cj, bcol;
1116: PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
1117: PetscReal afill;
1118: PetscInt i, j, ndouble = 0;
1119: PetscSegBuffer seg, segrow;
1120: char *seen;
1122: PetscMalloc1(am + 1, &ci);
1123: ci[0] = 0;
1125: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
1126: PetscSegBufferCreate(sizeof(PetscInt), (PetscInt)(fill * (ai[am] + bi[bm])), &seg);
1127: PetscSegBufferCreate(sizeof(PetscInt), 100, &segrow);
1128: PetscCalloc1(bn, &seen);
1130: /* Determine ci and cj */
1131: for (i = 0; i < am; i++) {
1132: const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
1133: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
1134: PetscInt packlen = 0, *PETSC_RESTRICT crow;
1136: /* Pack segrow */
1137: for (j = 0; j < anzi; j++) {
1138: PetscInt brow = acol[j], bjstart = bi[brow], bjend = bi[brow + 1], k;
1139: for (k = bjstart; k < bjend; k++) {
1140: bcol = bj[k];
1141: if (!seen[bcol]) { /* new entry */
1142: PetscInt *PETSC_RESTRICT slot;
1143: PetscSegBufferGetInts(segrow, 1, &slot);
1144: *slot = bcol;
1145: seen[bcol] = 1;
1146: packlen++;
1147: }
1148: }
1149: }
1151: /* Check i-th diagonal entry */
1152: if (C->force_diagonals && !seen[i]) {
1153: PetscInt *PETSC_RESTRICT slot;
1154: PetscSegBufferGetInts(segrow, 1, &slot);
1155: *slot = i;
1156: seen[i] = 1;
1157: packlen++;
1158: }
1160: PetscSegBufferGetInts(seg, packlen, &crow);
1161: PetscSegBufferExtractTo(segrow, crow);
1162: PetscSortInt(packlen, crow);
1163: ci[i + 1] = ci[i] + packlen;
1164: for (j = 0; j < packlen; j++) seen[crow[j]] = 0;
1165: }
1166: PetscSegBufferDestroy(&segrow);
1167: PetscFree(seen);
1169: /* Column indices are in the segmented buffer */
1170: PetscSegBufferExtractAlloc(seg, &cj);
1171: PetscSegBufferDestroy(&seg);
1173: /* put together the new symbolic matrix */
1174: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C);
1175: MatSetBlockSizesFromMats(C, A, B);
1177: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1178: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
1179: c = (Mat_SeqAIJ *)(C->data);
1180: c->free_a = PETSC_TRUE;
1181: c->free_ij = PETSC_TRUE;
1182: c->nonew = 0;
1184: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
1186: /* set MatInfo */
1187: afill = (PetscReal)ci[am] / PetscMax(ai[am] + bi[bm], 1) + 1.e-5;
1188: if (afill < 1.0) afill = 1.0;
1189: C->info.mallocs = ndouble;
1190: C->info.fill_ratio_given = fill;
1191: C->info.fill_ratio_needed = afill;
1193: #if defined(PETSC_USE_INFO)
1194: if (ci[am]) {
1195: PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill);
1196: PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill);
1197: } else {
1198: PetscInfo(C, "Empty matrix product\n");
1199: }
1200: #endif
1201: return 0;
1202: }
1204: PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(void *data)
1205: {
1206: Mat_MatMatTransMult *abt = (Mat_MatMatTransMult *)data;
1208: MatTransposeColoringDestroy(&abt->matcoloring);
1209: MatDestroy(&abt->Bt_den);
1210: MatDestroy(&abt->ABt_den);
1211: PetscFree(abt);
1212: return 0;
1213: }
1215: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat B, PetscReal fill, Mat C)
1216: {
1217: Mat Bt;
1218: Mat_MatMatTransMult *abt;
1219: Mat_Product *product = C->product;
1220: char *alg;
1225: /* create symbolic Bt */
1226: MatTransposeSymbolic(B, &Bt);
1227: MatSetBlockSizes(Bt, PetscAbs(A->cmap->bs), PetscAbs(B->cmap->bs));
1228: MatSetType(Bt, ((PetscObject)A)->type_name);
1230: /* get symbolic C=A*Bt */
1231: PetscStrallocpy(product->alg, &alg);
1232: MatProductSetAlgorithm(C, "sorted"); /* set algorithm for C = A*Bt */
1233: MatMatMultSymbolic_SeqAIJ_SeqAIJ(A, Bt, fill, C);
1234: MatProductSetAlgorithm(C, alg); /* resume original algorithm for ABt product */
1235: PetscFree(alg);
1237: /* create a supporting struct for reuse intermediate dense matrices with matcoloring */
1238: PetscNew(&abt);
1240: product->data = abt;
1241: product->destroy = MatDestroy_SeqAIJ_MatMatMultTrans;
1243: C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
1245: abt->usecoloring = PETSC_FALSE;
1246: PetscStrcmp(product->alg, "color", &abt->usecoloring);
1247: if (abt->usecoloring) {
1248: /* Create MatTransposeColoring from symbolic C=A*B^T */
1249: MatTransposeColoring matcoloring;
1250: MatColoring coloring;
1251: ISColoring iscoloring;
1252: Mat Bt_dense, C_dense;
1254: /* inode causes memory problem */
1255: MatSetOption(C, MAT_USE_INODES, PETSC_FALSE);
1257: MatColoringCreate(C, &coloring);
1258: MatColoringSetDistance(coloring, 2);
1259: MatColoringSetType(coloring, MATCOLORINGSL);
1260: MatColoringSetFromOptions(coloring);
1261: MatColoringApply(coloring, &iscoloring);
1262: MatColoringDestroy(&coloring);
1263: MatTransposeColoringCreate(C, iscoloring, &matcoloring);
1265: abt->matcoloring = matcoloring;
1267: ISColoringDestroy(&iscoloring);
1269: /* Create Bt_dense and C_dense = A*Bt_dense */
1270: MatCreate(PETSC_COMM_SELF, &Bt_dense);
1271: MatSetSizes(Bt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors);
1272: MatSetType(Bt_dense, MATSEQDENSE);
1273: MatSeqDenseSetPreallocation(Bt_dense, NULL);
1275: Bt_dense->assembled = PETSC_TRUE;
1276: abt->Bt_den = Bt_dense;
1278: MatCreate(PETSC_COMM_SELF, &C_dense);
1279: MatSetSizes(C_dense, A->rmap->n, matcoloring->ncolors, A->rmap->n, matcoloring->ncolors);
1280: MatSetType(C_dense, MATSEQDENSE);
1281: MatSeqDenseSetPreallocation(C_dense, NULL);
1283: Bt_dense->assembled = PETSC_TRUE;
1284: abt->ABt_den = C_dense;
1286: #if defined(PETSC_USE_INFO)
1287: {
1288: Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
1289: PetscCall(PetscInfo(C, "Use coloring of C=A*B^T; B^T: %" PetscInt_FMT " %" PetscInt_FMT ", Bt_dense: %" PetscInt_FMT ",%" PetscInt_FMT "; Cnz %" PetscInt_FMT " / (cm*ncolors %" PetscInt_FMT ") = %g\n", B->cmap->n, B->rmap->n, Bt_dense->rmap->n,
1290: Bt_dense->cmap->n, c->nz, A->rmap->n * matcoloring->ncolors, (double)(((PetscReal)(c->nz)) / ((PetscReal)(A->rmap->n * matcoloring->ncolors)))));
1291: }
1292: #endif
1293: }
1294: /* clean up */
1295: MatDestroy(&Bt);
1296: return 0;
1297: }
1299: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A, Mat B, Mat C)
1300: {
1301: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c = (Mat_SeqAIJ *)C->data;
1302: PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, anzi, bnzj, nexta, nextb, *acol, *bcol, brow;
1303: PetscInt cm = C->rmap->n, *ci = c->i, *cj = c->j, i, j, cnzi, *ccol;
1304: PetscLogDouble flops = 0.0;
1305: MatScalar *aa = a->a, *aval, *ba = b->a, *bval, *ca, *cval;
1306: Mat_MatMatTransMult *abt;
1307: Mat_Product *product = C->product;
1310: abt = (Mat_MatMatTransMult *)product->data;
1312: /* clear old values in C */
1313: if (!c->a) {
1314: PetscCalloc1(ci[cm] + 1, &ca);
1315: c->a = ca;
1316: c->free_a = PETSC_TRUE;
1317: } else {
1318: ca = c->a;
1319: PetscArrayzero(ca, ci[cm] + 1);
1320: }
1322: if (abt->usecoloring) {
1323: MatTransposeColoring matcoloring = abt->matcoloring;
1324: Mat Bt_dense, C_dense = abt->ABt_den;
1326: /* Get Bt_dense by Apply MatTransposeColoring to B */
1327: Bt_dense = abt->Bt_den;
1328: MatTransColoringApplySpToDen(matcoloring, B, Bt_dense);
1330: /* C_dense = A*Bt_dense */
1331: MatMatMultNumeric_SeqAIJ_SeqDense(A, Bt_dense, C_dense);
1333: /* Recover C from C_dense */
1334: MatTransColoringApplyDenToSp(matcoloring, C_dense, C);
1335: return 0;
1336: }
1338: for (i = 0; i < cm; i++) {
1339: anzi = ai[i + 1] - ai[i];
1340: acol = aj + ai[i];
1341: aval = aa + ai[i];
1342: cnzi = ci[i + 1] - ci[i];
1343: ccol = cj + ci[i];
1344: cval = ca + ci[i];
1345: for (j = 0; j < cnzi; j++) {
1346: brow = ccol[j];
1347: bnzj = bi[brow + 1] - bi[brow];
1348: bcol = bj + bi[brow];
1349: bval = ba + bi[brow];
1351: /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
1352: nexta = 0;
1353: nextb = 0;
1354: while (nexta < anzi && nextb < bnzj) {
1355: while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++;
1356: if (nexta == anzi) break;
1357: while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++;
1358: if (nextb == bnzj) break;
1359: if (acol[nexta] == bcol[nextb]) {
1360: cval[j] += aval[nexta] * bval[nextb];
1361: nexta++;
1362: nextb++;
1363: flops += 2;
1364: }
1365: }
1366: }
1367: }
1368: MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
1369: MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
1370: PetscLogFlops(flops);
1371: return 0;
1372: }
1374: PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(void *data)
1375: {
1376: Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)data;
1378: MatDestroy(&atb->At);
1379: if (atb->destroy) (*atb->destroy)(atb->data);
1380: PetscFree(atb);
1381: return 0;
1382: }
1384: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat B, PetscReal fill, Mat C)
1385: {
1386: Mat At = NULL;
1387: Mat_Product *product = C->product;
1388: PetscBool flg, def, square;
1390: MatCheckProduct(C, 4);
1391: square = (PetscBool)(A == B && A->symmetric == PETSC_BOOL3_TRUE);
1392: /* outerproduct */
1393: PetscStrcmp(product->alg, "outerproduct", &flg);
1394: if (flg) {
1395: /* create symbolic At */
1396: if (!square) {
1397: MatTransposeSymbolic(A, &At);
1398: MatSetBlockSizes(At, PetscAbs(A->cmap->bs), PetscAbs(B->cmap->bs));
1399: MatSetType(At, ((PetscObject)A)->type_name);
1400: }
1401: /* get symbolic C=At*B */
1402: MatProductSetAlgorithm(C, "sorted");
1403: MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At, B, fill, C);
1405: /* clean up */
1406: if (!square) MatDestroy(&At);
1408: C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */
1409: MatProductSetAlgorithm(C, "outerproduct");
1410: return 0;
1411: }
1413: /* matmatmult */
1414: PetscStrcmp(product->alg, "default", &def);
1415: PetscStrcmp(product->alg, "at*b", &flg);
1416: if (flg || def) {
1417: Mat_MatTransMatMult *atb;
1420: PetscNew(&atb);
1421: if (!square) MatTranspose(A, MAT_INITIAL_MATRIX, &At);
1422: MatProductSetAlgorithm(C, "sorted");
1423: MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At, B, fill, C);
1424: MatProductSetAlgorithm(C, "at*b");
1425: product->data = atb;
1426: product->destroy = MatDestroy_SeqAIJ_MatTransMatMult;
1427: atb->At = At;
1429: C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */
1430: return 0;
1431: }
1433: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
1434: }
1436: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A, Mat B, Mat C)
1437: {
1438: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c = (Mat_SeqAIJ *)C->data;
1439: PetscInt am = A->rmap->n, anzi, *ai = a->i, *aj = a->j, *bi = b->i, *bj, bnzi, nextb;
1440: PetscInt cm = C->rmap->n, *ci = c->i, *cj = c->j, crow, *cjj, i, j, k;
1441: PetscLogDouble flops = 0.0;
1442: MatScalar *aa = a->a, *ba, *ca, *caj;
1444: if (!c->a) {
1445: PetscCalloc1(ci[cm] + 1, &ca);
1447: c->a = ca;
1448: c->free_a = PETSC_TRUE;
1449: } else {
1450: ca = c->a;
1451: PetscArrayzero(ca, ci[cm]);
1452: }
1454: /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
1455: for (i = 0; i < am; i++) {
1456: bj = b->j + bi[i];
1457: ba = b->a + bi[i];
1458: bnzi = bi[i + 1] - bi[i];
1459: anzi = ai[i + 1] - ai[i];
1460: for (j = 0; j < anzi; j++) {
1461: nextb = 0;
1462: crow = *aj++;
1463: cjj = cj + ci[crow];
1464: caj = ca + ci[crow];
1465: /* perform sparse axpy operation. Note cjj includes bj. */
1466: for (k = 0; nextb < bnzi; k++) {
1467: if (cjj[k] == *(bj + nextb)) { /* ccol == bcol */
1468: caj[k] += (*aa) * (*(ba + nextb));
1469: nextb++;
1470: }
1471: }
1472: flops += 2 * bnzi;
1473: aa++;
1474: }
1475: }
1477: /* Assemble the final matrix and clean up */
1478: MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
1479: MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
1480: PetscLogFlops(flops);
1481: return 0;
1482: }
1484: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
1485: {
1486: MatMatMultSymbolic_SeqDense_SeqDense(A, B, 0.0, C);
1487: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense;
1488: return 0;
1489: }
1491: PETSC_INTERN PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A, Mat B, Mat C, const PetscBool add)
1492: {
1493: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1494: PetscScalar *c, r1, r2, r3, r4, *c1, *c2, *c3, *c4;
1495: const PetscScalar *aa, *b, *b1, *b2, *b3, *b4, *av;
1496: const PetscInt *aj;
1497: PetscInt cm = C->rmap->n, cn = B->cmap->n, bm, am = A->rmap->n;
1498: PetscInt clda;
1499: PetscInt am4, bm4, col, i, j, n;
1501: if (!cm || !cn) return 0;
1502: MatSeqAIJGetArrayRead(A, &av);
1503: if (add) {
1504: MatDenseGetArray(C, &c);
1505: } else {
1506: MatDenseGetArrayWrite(C, &c);
1507: }
1508: MatDenseGetArrayRead(B, &b);
1509: MatDenseGetLDA(B, &bm);
1510: MatDenseGetLDA(C, &clda);
1511: am4 = 4 * clda;
1512: bm4 = 4 * bm;
1513: b1 = b;
1514: b2 = b1 + bm;
1515: b3 = b2 + bm;
1516: b4 = b3 + bm;
1517: c1 = c;
1518: c2 = c1 + clda;
1519: c3 = c2 + clda;
1520: c4 = c3 + clda;
1521: for (col = 0; col < (cn / 4) * 4; col += 4) { /* over columns of C */
1522: for (i = 0; i < am; i++) { /* over rows of A in those columns */
1523: r1 = r2 = r3 = r4 = 0.0;
1524: n = a->i[i + 1] - a->i[i];
1525: aj = a->j + a->i[i];
1526: aa = av + a->i[i];
1527: for (j = 0; j < n; j++) {
1528: const PetscScalar aatmp = aa[j];
1529: const PetscInt ajtmp = aj[j];
1530: r1 += aatmp * b1[ajtmp];
1531: r2 += aatmp * b2[ajtmp];
1532: r3 += aatmp * b3[ajtmp];
1533: r4 += aatmp * b4[ajtmp];
1534: }
1535: if (add) {
1536: c1[i] += r1;
1537: c2[i] += r2;
1538: c3[i] += r3;
1539: c4[i] += r4;
1540: } else {
1541: c1[i] = r1;
1542: c2[i] = r2;
1543: c3[i] = r3;
1544: c4[i] = r4;
1545: }
1546: }
1547: b1 += bm4;
1548: b2 += bm4;
1549: b3 += bm4;
1550: b4 += bm4;
1551: c1 += am4;
1552: c2 += am4;
1553: c3 += am4;
1554: c4 += am4;
1555: }
1556: /* process remaining columns */
1557: if (col != cn) {
1558: PetscInt rc = cn - col;
1560: if (rc == 1) {
1561: for (i = 0; i < am; i++) {
1562: r1 = 0.0;
1563: n = a->i[i + 1] - a->i[i];
1564: aj = a->j + a->i[i];
1565: aa = av + a->i[i];
1566: for (j = 0; j < n; j++) r1 += aa[j] * b1[aj[j]];
1567: if (add) c1[i] += r1;
1568: else c1[i] = r1;
1569: }
1570: } else if (rc == 2) {
1571: for (i = 0; i < am; i++) {
1572: r1 = r2 = 0.0;
1573: n = a->i[i + 1] - a->i[i];
1574: aj = a->j + a->i[i];
1575: aa = av + a->i[i];
1576: for (j = 0; j < n; j++) {
1577: const PetscScalar aatmp = aa[j];
1578: const PetscInt ajtmp = aj[j];
1579: r1 += aatmp * b1[ajtmp];
1580: r2 += aatmp * b2[ajtmp];
1581: }
1582: if (add) {
1583: c1[i] += r1;
1584: c2[i] += r2;
1585: } else {
1586: c1[i] = r1;
1587: c2[i] = r2;
1588: }
1589: }
1590: } else {
1591: for (i = 0; i < am; i++) {
1592: r1 = r2 = r3 = 0.0;
1593: n = a->i[i + 1] - a->i[i];
1594: aj = a->j + a->i[i];
1595: aa = av + a->i[i];
1596: for (j = 0; j < n; j++) {
1597: const PetscScalar aatmp = aa[j];
1598: const PetscInt ajtmp = aj[j];
1599: r1 += aatmp * b1[ajtmp];
1600: r2 += aatmp * b2[ajtmp];
1601: r3 += aatmp * b3[ajtmp];
1602: }
1603: if (add) {
1604: c1[i] += r1;
1605: c2[i] += r2;
1606: c3[i] += r3;
1607: } else {
1608: c1[i] = r1;
1609: c2[i] = r2;
1610: c3[i] = r3;
1611: }
1612: }
1613: }
1614: }
1615: PetscLogFlops(cn * (2.0 * a->nz));
1616: if (add) {
1617: MatDenseRestoreArray(C, &c);
1618: } else {
1619: MatDenseRestoreArrayWrite(C, &c);
1620: }
1621: MatDenseRestoreArrayRead(B, &b);
1622: MatSeqAIJRestoreArrayRead(A, &av);
1623: return 0;
1624: }
1626: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A, Mat B, Mat C)
1627: {
1632: MatMatMultNumericAdd_SeqAIJ_SeqDense(A, B, C, PETSC_FALSE);
1633: return 0;
1634: }
1636: /* ------------------------------------------------------- */
1637: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C)
1638: {
1639: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense;
1640: C->ops->productsymbolic = MatProductSymbolic_AB;
1641: return 0;
1642: }
1644: PETSC_INTERN PetscErrorCode MatTMatTMultSymbolic_SeqAIJ_SeqDense(Mat, Mat, PetscReal, Mat);
1646: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C)
1647: {
1648: C->ops->transposematmultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense;
1649: C->ops->productsymbolic = MatProductSymbolic_AtB;
1650: return 0;
1651: }
1653: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(Mat C)
1654: {
1655: C->ops->mattransposemultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense;
1656: C->ops->productsymbolic = MatProductSymbolic_ABt;
1657: return 0;
1658: }
1660: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C)
1661: {
1662: Mat_Product *product = C->product;
1664: switch (product->type) {
1665: case MATPRODUCT_AB:
1666: MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C);
1667: break;
1668: case MATPRODUCT_AtB:
1669: MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C);
1670: break;
1671: case MATPRODUCT_ABt:
1672: MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(C);
1673: break;
1674: default:
1675: break;
1676: }
1677: return 0;
1678: }
1679: /* ------------------------------------------------------- */
1680: static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C)
1681: {
1682: Mat_Product *product = C->product;
1683: Mat A = product->A;
1684: PetscBool baij;
1686: PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &baij);
1687: if (!baij) { /* A is seqsbaij */
1688: PetscBool sbaij;
1689: PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &sbaij);
1692: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense;
1693: } else { /* A is seqbaij */
1694: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense;
1695: }
1697: C->ops->productsymbolic = MatProductSymbolic_AB;
1698: return 0;
1699: }
1701: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C)
1702: {
1703: Mat_Product *product = C->product;
1705: MatCheckProduct(C, 1);
1707: if (product->type == MATPRODUCT_AB || (product->type == MATPRODUCT_AtB && product->A->symmetric == PETSC_BOOL3_TRUE)) MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C);
1708: return 0;
1709: }
1711: /* ------------------------------------------------------- */
1712: static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C)
1713: {
1714: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ;
1715: C->ops->productsymbolic = MatProductSymbolic_AB;
1716: return 0;
1717: }
1719: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C)
1720: {
1721: Mat_Product *product = C->product;
1723: if (product->type == MATPRODUCT_AB) MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C);
1724: return 0;
1725: }
1726: /* ------------------------------------------------------- */
1728: PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring, Mat B, Mat Btdense)
1729: {
1730: Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
1731: Mat_SeqDense *btdense = (Mat_SeqDense *)Btdense->data;
1732: PetscInt *bi = b->i, *bj = b->j;
1733: PetscInt m = Btdense->rmap->n, n = Btdense->cmap->n, j, k, l, col, anz, *btcol, brow, ncolumns;
1734: MatScalar *btval, *btval_den, *ba = b->a;
1735: PetscInt *columns = coloring->columns, *colorforcol = coloring->colorforcol, ncolors = coloring->ncolors;
1737: btval_den = btdense->v;
1738: PetscArrayzero(btval_den, m * n);
1739: for (k = 0; k < ncolors; k++) {
1740: ncolumns = coloring->ncolumns[k];
1741: for (l = 0; l < ncolumns; l++) { /* insert a row of B to a column of Btdense */
1742: col = *(columns + colorforcol[k] + l);
1743: btcol = bj + bi[col];
1744: btval = ba + bi[col];
1745: anz = bi[col + 1] - bi[col];
1746: for (j = 0; j < anz; j++) {
1747: brow = btcol[j];
1748: btval_den[brow] = btval[j];
1749: }
1750: }
1751: btval_den += m;
1752: }
1753: return 0;
1754: }
1756: PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring, Mat Cden, Mat Csp)
1757: {
1758: Mat_SeqAIJ *csp = (Mat_SeqAIJ *)Csp->data;
1759: const PetscScalar *ca_den, *ca_den_ptr;
1760: PetscScalar *ca = csp->a;
1761: PetscInt k, l, m = Cden->rmap->n, ncolors = matcoloring->ncolors;
1762: PetscInt brows = matcoloring->brows, *den2sp = matcoloring->den2sp;
1763: PetscInt nrows, *row, *idx;
1764: PetscInt *rows = matcoloring->rows, *colorforrow = matcoloring->colorforrow;
1766: MatDenseGetArrayRead(Cden, &ca_den);
1768: if (brows > 0) {
1769: PetscInt *lstart, row_end, row_start;
1770: lstart = matcoloring->lstart;
1771: PetscArrayzero(lstart, ncolors);
1773: row_end = brows;
1774: if (row_end > m) row_end = m;
1775: for (row_start = 0; row_start < m; row_start += brows) { /* loop over row blocks of Csp */
1776: ca_den_ptr = ca_den;
1777: for (k = 0; k < ncolors; k++) { /* loop over colors (columns of Cden) */
1778: nrows = matcoloring->nrows[k];
1779: row = rows + colorforrow[k];
1780: idx = den2sp + colorforrow[k];
1781: for (l = lstart[k]; l < nrows; l++) {
1782: if (row[l] >= row_end) {
1783: lstart[k] = l;
1784: break;
1785: } else {
1786: ca[idx[l]] = ca_den_ptr[row[l]];
1787: }
1788: }
1789: ca_den_ptr += m;
1790: }
1791: row_end += brows;
1792: if (row_end > m) row_end = m;
1793: }
1794: } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */
1795: ca_den_ptr = ca_den;
1796: for (k = 0; k < ncolors; k++) {
1797: nrows = matcoloring->nrows[k];
1798: row = rows + colorforrow[k];
1799: idx = den2sp + colorforrow[k];
1800: for (l = 0; l < nrows; l++) ca[idx[l]] = ca_den_ptr[row[l]];
1801: ca_den_ptr += m;
1802: }
1803: }
1805: MatDenseRestoreArrayRead(Cden, &ca_den);
1806: #if defined(PETSC_USE_INFO)
1807: if (matcoloring->brows > 0) {
1808: PetscInfo(Csp, "Loop over %" PetscInt_FMT " row blocks for den2sp\n", brows);
1809: } else {
1810: PetscInfo(Csp, "Loop over colors/columns of Cden, inefficient for large sparse matrix product \n");
1811: }
1812: #endif
1813: return 0;
1814: }
1816: PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat, ISColoring iscoloring, MatTransposeColoring c)
1817: {
1818: PetscInt i, n, nrows, Nbs, j, k, m, ncols, col, cm;
1819: const PetscInt *is, *ci, *cj, *row_idx;
1820: PetscInt nis = iscoloring->n, *rowhit, bs = 1;
1821: IS *isa;
1822: Mat_SeqAIJ *csp = (Mat_SeqAIJ *)mat->data;
1823: PetscInt *colorforrow, *rows, *rows_i, *idxhit, *spidx, *den2sp, *den2sp_i;
1824: PetscInt *colorforcol, *columns, *columns_i, brows;
1825: PetscBool flg;
1827: ISColoringGetIS(iscoloring, PETSC_USE_POINTER, PETSC_IGNORE, &isa);
1829: /* bs >1 is not being tested yet! */
1830: Nbs = mat->cmap->N / bs;
1831: c->M = mat->rmap->N / bs; /* set total rows, columns and local rows */
1832: c->N = Nbs;
1833: c->m = c->M;
1834: c->rstart = 0;
1835: c->brows = 100;
1837: c->ncolors = nis;
1838: PetscMalloc3(nis, &c->ncolumns, nis, &c->nrows, nis + 1, &colorforrow);
1839: PetscMalloc1(csp->nz + 1, &rows);
1840: PetscMalloc1(csp->nz + 1, &den2sp);
1842: brows = c->brows;
1843: PetscOptionsGetInt(NULL, NULL, "-matden2sp_brows", &brows, &flg);
1844: if (flg) c->brows = brows;
1845: if (brows > 0) PetscMalloc1(nis + 1, &c->lstart);
1847: colorforrow[0] = 0;
1848: rows_i = rows;
1849: den2sp_i = den2sp;
1851: PetscMalloc1(nis + 1, &colorforcol);
1852: PetscMalloc1(Nbs + 1, &columns);
1854: colorforcol[0] = 0;
1855: columns_i = columns;
1857: /* get column-wise storage of mat */
1858: MatGetColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL);
1860: cm = c->m;
1861: PetscMalloc1(cm + 1, &rowhit);
1862: PetscMalloc1(cm + 1, &idxhit);
1863: for (i = 0; i < nis; i++) { /* loop over color */
1864: ISGetLocalSize(isa[i], &n);
1865: ISGetIndices(isa[i], &is);
1867: c->ncolumns[i] = n;
1868: if (n) PetscArraycpy(columns_i, is, n);
1869: colorforcol[i + 1] = colorforcol[i] + n;
1870: columns_i += n;
1872: /* fast, crude version requires O(N*N) work */
1873: PetscArrayzero(rowhit, cm);
1875: for (j = 0; j < n; j++) { /* loop over columns*/
1876: col = is[j];
1877: row_idx = cj + ci[col];
1878: m = ci[col + 1] - ci[col];
1879: for (k = 0; k < m; k++) { /* loop over columns marking them in rowhit */
1880: idxhit[*row_idx] = spidx[ci[col] + k];
1881: rowhit[*row_idx++] = col + 1;
1882: }
1883: }
1884: /* count the number of hits */
1885: nrows = 0;
1886: for (j = 0; j < cm; j++) {
1887: if (rowhit[j]) nrows++;
1888: }
1889: c->nrows[i] = nrows;
1890: colorforrow[i + 1] = colorforrow[i] + nrows;
1892: nrows = 0;
1893: for (j = 0; j < cm; j++) { /* loop over rows */
1894: if (rowhit[j]) {
1895: rows_i[nrows] = j;
1896: den2sp_i[nrows] = idxhit[j];
1897: nrows++;
1898: }
1899: }
1900: den2sp_i += nrows;
1902: ISRestoreIndices(isa[i], &is);
1903: rows_i += nrows;
1904: }
1905: MatRestoreColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL);
1906: PetscFree(rowhit);
1907: ISColoringRestoreIS(iscoloring, PETSC_USE_POINTER, &isa);
1910: c->colorforrow = colorforrow;
1911: c->rows = rows;
1912: c->den2sp = den2sp;
1913: c->colorforcol = colorforcol;
1914: c->columns = columns;
1916: PetscFree(idxhit);
1917: return 0;
1918: }
1920: /* --------------------------------------------------------------- */
1921: static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C)
1922: {
1923: Mat_Product *product = C->product;
1924: Mat A = product->A, B = product->B;
1926: if (C->ops->mattransposemultnumeric) {
1927: /* Alg: "outerproduct" */
1928: (*C->ops->mattransposemultnumeric)(A, B, C);
1929: } else {
1930: /* Alg: "matmatmult" -- C = At*B */
1931: Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)product->data;
1934: if (atb->At) {
1935: /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ();
1936: user may have called MatProductReplaceMats() to get this A=product->A */
1937: MatTransposeSetPrecursor(A, atb->At);
1938: MatTranspose(A, MAT_REUSE_MATRIX, &atb->At);
1939: }
1940: MatMatMultNumeric_SeqAIJ_SeqAIJ(atb->At ? atb->At : A, B, C);
1941: }
1942: return 0;
1943: }
1945: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C)
1946: {
1947: Mat_Product *product = C->product;
1948: Mat A = product->A, B = product->B;
1949: PetscReal fill = product->fill;
1951: MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A, B, fill, C);
1953: C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ;
1954: return 0;
1955: }
1957: /* --------------------------------------------------------------- */
1958: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C)
1959: {
1960: Mat_Product *product = C->product;
1961: PetscInt alg = 0; /* default algorithm */
1962: PetscBool flg = PETSC_FALSE;
1963: #if !defined(PETSC_HAVE_HYPRE)
1964: const char *algTypes[7] = {"sorted", "scalable", "scalable_fast", "heap", "btheap", "llcondensed", "rowmerge"};
1965: PetscInt nalg = 7;
1966: #else
1967: const char *algTypes[8] = {"sorted", "scalable", "scalable_fast", "heap", "btheap", "llcondensed", "rowmerge", "hypre"};
1968: PetscInt nalg = 8;
1969: #endif
1971: /* Set default algorithm */
1972: PetscStrcmp(C->product->alg, "default", &flg);
1973: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
1975: /* Get runtime option */
1976: if (product->api_user) {
1977: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
1978: PetscOptionsEList("-matmatmult_via", "Algorithmic approach", "MatMatMult", algTypes, nalg, algTypes[0], &alg, &flg);
1979: PetscOptionsEnd();
1980: } else {
1981: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
1982: PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[0], &alg, &flg);
1983: PetscOptionsEnd();
1984: }
1985: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
1987: C->ops->productsymbolic = MatProductSymbolic_AB;
1988: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
1989: return 0;
1990: }
1992: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C)
1993: {
1994: Mat_Product *product = C->product;
1995: PetscInt alg = 0; /* default algorithm */
1996: PetscBool flg = PETSC_FALSE;
1997: const char *algTypes[3] = {"default", "at*b", "outerproduct"};
1998: PetscInt nalg = 3;
2000: /* Get runtime option */
2001: if (product->api_user) {
2002: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatTransposeMatMult", "Mat");
2003: PetscOptionsEList("-mattransposematmult_via", "Algorithmic approach", "MatTransposeMatMult", algTypes, nalg, algTypes[alg], &alg, &flg);
2004: PetscOptionsEnd();
2005: } else {
2006: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AtB", "Mat");
2007: PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AtB", algTypes, nalg, algTypes[alg], &alg, &flg);
2008: PetscOptionsEnd();
2009: }
2010: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2012: C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ;
2013: return 0;
2014: }
2016: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C)
2017: {
2018: Mat_Product *product = C->product;
2019: PetscInt alg = 0; /* default algorithm */
2020: PetscBool flg = PETSC_FALSE;
2021: const char *algTypes[2] = {"default", "color"};
2022: PetscInt nalg = 2;
2024: /* Set default algorithm */
2025: PetscStrcmp(C->product->alg, "default", &flg);
2026: if (!flg) {
2027: alg = 1;
2028: MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2029: }
2031: /* Get runtime option */
2032: if (product->api_user) {
2033: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2034: PetscOptionsEList("-matmattransmult_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg);
2035: PetscOptionsEnd();
2036: } else {
2037: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2038: PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg);
2039: PetscOptionsEnd();
2040: }
2041: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2043: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
2044: C->ops->productsymbolic = MatProductSymbolic_ABt;
2045: return 0;
2046: }
2048: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C)
2049: {
2050: Mat_Product *product = C->product;
2051: PetscBool flg = PETSC_FALSE;
2052: PetscInt alg = 0; /* default algorithm -- alg=1 should be default!!! */
2053: #if !defined(PETSC_HAVE_HYPRE)
2054: const char *algTypes[2] = {"scalable", "rap"};
2055: PetscInt nalg = 2;
2056: #else
2057: const char *algTypes[3] = {"scalable", "rap", "hypre"};
2058: PetscInt nalg = 3;
2059: #endif
2061: /* Set default algorithm */
2062: PetscStrcmp(product->alg, "default", &flg);
2063: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2065: /* Get runtime option */
2066: if (product->api_user) {
2067: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
2068: PetscOptionsEList("-matptap_via", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[0], &alg, &flg);
2069: PetscOptionsEnd();
2070: } else {
2071: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
2072: PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_PtAP", algTypes, nalg, algTypes[0], &alg, &flg);
2073: PetscOptionsEnd();
2074: }
2075: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2077: C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ;
2078: return 0;
2079: }
2081: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C)
2082: {
2083: Mat_Product *product = C->product;
2084: PetscBool flg = PETSC_FALSE;
2085: PetscInt alg = 0; /* default algorithm */
2086: const char *algTypes[3] = {"r*a*rt", "r*art", "coloring_rart"};
2087: PetscInt nalg = 3;
2089: /* Set default algorithm */
2090: PetscStrcmp(product->alg, "default", &flg);
2091: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2093: /* Get runtime option */
2094: if (product->api_user) {
2095: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatRARt", "Mat");
2096: PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, nalg, algTypes[0], &alg, &flg);
2097: PetscOptionsEnd();
2098: } else {
2099: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_RARt", "Mat");
2100: PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_RARt", algTypes, nalg, algTypes[0], &alg, &flg);
2101: PetscOptionsEnd();
2102: }
2103: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2105: C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ;
2106: return 0;
2107: }
2109: /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */
2110: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C)
2111: {
2112: Mat_Product *product = C->product;
2113: PetscInt alg = 0; /* default algorithm */
2114: PetscBool flg = PETSC_FALSE;
2115: const char *algTypes[7] = {"sorted", "scalable", "scalable_fast", "heap", "btheap", "llcondensed", "rowmerge"};
2116: PetscInt nalg = 7;
2118: /* Set default algorithm */
2119: PetscStrcmp(product->alg, "default", &flg);
2120: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2122: /* Get runtime option */
2123: if (product->api_user) {
2124: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMatMult", "Mat");
2125: PetscOptionsEList("-matmatmatmult_via", "Algorithmic approach", "MatMatMatMult", algTypes, nalg, algTypes[alg], &alg, &flg);
2126: PetscOptionsEnd();
2127: } else {
2128: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABC", "Mat");
2129: PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABC", algTypes, nalg, algTypes[alg], &alg, &flg);
2130: PetscOptionsEnd();
2131: }
2132: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2134: C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ;
2135: C->ops->productsymbolic = MatProductSymbolic_ABC;
2136: return 0;
2137: }
2139: PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C)
2140: {
2141: Mat_Product *product = C->product;
2143: switch (product->type) {
2144: case MATPRODUCT_AB:
2145: MatProductSetFromOptions_SeqAIJ_AB(C);
2146: break;
2147: case MATPRODUCT_AtB:
2148: MatProductSetFromOptions_SeqAIJ_AtB(C);
2149: break;
2150: case MATPRODUCT_ABt:
2151: MatProductSetFromOptions_SeqAIJ_ABt(C);
2152: break;
2153: case MATPRODUCT_PtAP:
2154: MatProductSetFromOptions_SeqAIJ_PtAP(C);
2155: break;
2156: case MATPRODUCT_RARt:
2157: MatProductSetFromOptions_SeqAIJ_RARt(C);
2158: break;
2159: case MATPRODUCT_ABC:
2160: MatProductSetFromOptions_SeqAIJ_ABC(C);
2161: break;
2162: default:
2163: break;
2164: }
2165: return 0;
2166: }