Actual source code: matrart.c
2: /*
3: Defines projective product routines where A is a SeqAIJ matrix
4: C = R * A * R^T
5: */
7: #include <../src/mat/impls/aij/seq/aij.h>
8: #include <../src/mat/utils/freespace.h>
9: #include <../src/mat/impls/dense/seq/dense.h>
11: PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data)
12: {
13: Mat_RARt *rart = (Mat_RARt *)data;
15: MatTransposeColoringDestroy(&rart->matcoloring);
16: MatDestroy(&rart->Rt);
17: MatDestroy(&rart->RARt);
18: MatDestroy(&rart->ARt);
19: PetscFree(rart->work);
20: if (rart->destroy) (*rart->destroy)(rart->data);
21: PetscFree(rart);
22: return 0;
23: }
25: PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C)
26: {
27: Mat P;
28: Mat_RARt *rart;
29: MatColoring coloring;
30: MatTransposeColoring matcoloring;
31: ISColoring iscoloring;
32: Mat Rt_dense, RARt_dense;
34: MatCheckProduct(C, 4);
36: /* create symbolic P=Rt */
37: MatTransposeSymbolic(R, &P);
39: /* get symbolic C=Pt*A*P */
40: MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C);
41: MatSetBlockSizes(C, PetscAbs(R->rmap->bs), PetscAbs(R->rmap->bs));
42: C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;
44: /* create a supporting struct */
45: PetscNew(&rart);
46: C->product->data = rart;
47: C->product->destroy = MatDestroy_SeqAIJ_RARt;
49: /* ------ Use coloring ---------- */
50: /* inode causes memory problem */
51: MatSetOption(C, MAT_USE_INODES, PETSC_FALSE);
53: /* Create MatTransposeColoring from symbolic C=R*A*R^T */
54: MatColoringCreate(C, &coloring);
55: MatColoringSetDistance(coloring, 2);
56: MatColoringSetType(coloring, MATCOLORINGSL);
57: MatColoringSetFromOptions(coloring);
58: MatColoringApply(coloring, &iscoloring);
59: MatColoringDestroy(&coloring);
60: MatTransposeColoringCreate(C, iscoloring, &matcoloring);
62: rart->matcoloring = matcoloring;
63: ISColoringDestroy(&iscoloring);
65: /* Create Rt_dense */
66: MatCreate(PETSC_COMM_SELF, &Rt_dense);
67: MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors);
68: MatSetType(Rt_dense, MATSEQDENSE);
69: MatSeqDenseSetPreallocation(Rt_dense, NULL);
71: Rt_dense->assembled = PETSC_TRUE;
72: rart->Rt = Rt_dense;
74: /* Create RARt_dense = R*A*Rt_dense */
75: MatCreate(PETSC_COMM_SELF, &RARt_dense);
76: MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors);
77: MatSetType(RARt_dense, MATSEQDENSE);
78: MatSeqDenseSetPreallocation(RARt_dense, NULL);
80: rart->RARt = RARt_dense;
82: /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
83: PetscMalloc1(A->rmap->n * 4, &rart->work);
85: /* clean up */
86: MatDestroy(&P);
88: #if defined(PETSC_USE_INFO)
89: {
90: Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
91: PetscReal density = (PetscReal)(c->nz) / (RARt_dense->rmap->n * RARt_dense->cmap->n);
92: PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n");
93: PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density);
94: }
95: #endif
96: return 0;
97: }
99: /*
100: RAB = R * A * B, R and A in seqaij format, B in dense format;
101: */
102: PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work)
103: {
104: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data;
105: PetscScalar r1, r2, r3, r4;
106: const PetscScalar *b, *b1, *b2, *b3, *b4;
107: MatScalar *aa, *ra;
108: PetscInt cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n;
109: PetscInt am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm;
110: PetscScalar *d, *c, *c2, *c3, *c4;
111: PetscInt *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n;
112: PetscInt rm2 = 2 * rm, rm3 = 3 * rm, colrm;
114: if (!dm || !dn) return 0;
120: { /*
121: This approach is not as good as original ones (will be removed later), but it reveals that
122: AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
123: */
124: PetscBool via_matmatmult = PETSC_FALSE;
125: PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL);
126: if (via_matmatmult) {
127: Mat AB_den = NULL;
128: MatCreate(PetscObjectComm((PetscObject)A), &AB_den);
129: MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den);
130: MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den);
131: MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB);
132: MatDestroy(&AB_den);
133: return 0;
134: }
135: }
137: MatDenseGetArrayRead(B, &b);
138: MatDenseGetArray(RAB, &d);
139: b1 = b;
140: b2 = b1 + bm;
141: b3 = b2 + bm;
142: b4 = b3 + bm;
143: c = work;
144: c2 = c + am;
145: c3 = c2 + am;
146: c4 = c3 + am;
147: for (col = 0; col < cn - 4; col += 4) { /* over columns of C */
148: for (i = 0; i < am; i++) { /* over rows of A in those columns */
149: r1 = r2 = r3 = r4 = 0.0;
150: n = ai[i + 1] - ai[i];
151: aj = a->j + ai[i];
152: aa = a->a + ai[i];
153: for (j = 0; j < n; j++) {
154: r1 += (*aa) * b1[*aj];
155: r2 += (*aa) * b2[*aj];
156: r3 += (*aa) * b3[*aj];
157: r4 += (*aa++) * b4[*aj++];
158: }
159: c[i] = r1;
160: c[am + i] = r2;
161: c[am2 + i] = r3;
162: c[am3 + i] = r4;
163: }
164: b1 += bm4;
165: b2 += bm4;
166: b3 += bm4;
167: b4 += bm4;
169: /* RAB[:,col] = R*C[:,col] */
170: colrm = col * rm;
171: for (i = 0; i < rm; i++) { /* over rows of R in those columns */
172: r1 = r2 = r3 = r4 = 0.0;
173: n = r->i[i + 1] - r->i[i];
174: rj = r->j + r->i[i];
175: ra = r->a + r->i[i];
176: for (j = 0; j < n; j++) {
177: r1 += (*ra) * c[*rj];
178: r2 += (*ra) * c2[*rj];
179: r3 += (*ra) * c3[*rj];
180: r4 += (*ra++) * c4[*rj++];
181: }
182: d[colrm + i] = r1;
183: d[colrm + rm + i] = r2;
184: d[colrm + rm2 + i] = r3;
185: d[colrm + rm3 + i] = r4;
186: }
187: }
188: for (; col < cn; col++) { /* over extra columns of C */
189: for (i = 0; i < am; i++) { /* over rows of A in those columns */
190: r1 = 0.0;
191: n = a->i[i + 1] - a->i[i];
192: aj = a->j + a->i[i];
193: aa = a->a + a->i[i];
194: for (j = 0; j < n; j++) r1 += (*aa++) * b1[*aj++];
195: c[i] = r1;
196: }
197: b1 += bm;
199: for (i = 0; i < rm; i++) { /* over rows of R in those columns */
200: r1 = 0.0;
201: n = r->i[i + 1] - r->i[i];
202: rj = r->j + r->i[i];
203: ra = r->a + r->i[i];
204: for (j = 0; j < n; j++) r1 += (*ra++) * c[*rj++];
205: d[col * rm + i] = r1;
206: }
207: }
208: PetscLogFlops(cn * 2.0 * (a->nz + r->nz));
210: MatDenseRestoreArrayRead(B, &b);
211: MatDenseRestoreArray(RAB, &d);
212: MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY);
213: MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY);
214: return 0;
215: }
217: PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C)
218: {
219: Mat_RARt *rart;
220: MatTransposeColoring matcoloring;
221: Mat Rt, RARt;
223: MatCheckProduct(C, 3);
225: rart = (Mat_RARt *)C->product->data;
227: /* Get dense Rt by Apply MatTransposeColoring to R */
228: matcoloring = rart->matcoloring;
229: Rt = rart->Rt;
230: MatTransColoringApplySpToDen(matcoloring, R, Rt);
232: /* Get dense RARt = R*A*Rt -- dominates! */
233: RARt = rart->RARt;
234: MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work);
236: /* Recover C from C_dense */
237: MatTransColoringApplyDenToSp(matcoloring, RARt, C);
238: return 0;
239: }
241: PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C)
242: {
243: Mat ARt;
244: Mat_RARt *rart;
245: char *alg;
247: MatCheckProduct(C, 4);
249: /* create symbolic ARt = A*R^T */
250: MatProductCreate(A, R, NULL, &ARt);
251: MatProductSetType(ARt, MATPRODUCT_ABt);
252: MatProductSetAlgorithm(ARt, "sorted");
253: MatProductSetFill(ARt, fill);
254: MatProductSetFromOptions(ARt);
255: MatProductSymbolic(ARt);
257: /* compute symbolic C = R*ARt */
258: /* set algorithm for C = R*ARt */
259: PetscStrallocpy(C->product->alg, &alg);
260: MatProductSetAlgorithm(C, "sorted");
261: MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C);
262: /* resume original algorithm for C */
263: MatProductSetAlgorithm(C, alg);
264: PetscFree(alg);
266: C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;
268: PetscNew(&rart);
269: rart->ARt = ARt;
270: C->product->data = rart;
271: C->product->destroy = MatDestroy_SeqAIJ_RARt;
272: PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n");
273: return 0;
274: }
276: PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C)
277: {
278: Mat_RARt *rart;
280: MatCheckProduct(C, 3);
282: rart = (Mat_RARt *)C->product->data;
283: MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt); /* dominate! */
284: MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C);
285: return 0;
286: }
288: PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C)
289: {
290: Mat Rt;
291: Mat_RARt *rart;
293: MatCheckProduct(C, 4);
295: MatTranspose(R, MAT_INITIAL_MATRIX, &Rt);
296: MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C);
298: PetscNew(&rart);
299: rart->data = C->product->data;
300: rart->destroy = C->product->destroy;
301: rart->Rt = Rt;
302: C->product->data = rart;
303: C->product->destroy = MatDestroy_SeqAIJ_RARt;
304: C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
305: PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n");
306: return 0;
307: }
309: PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C)
310: {
311: Mat_RARt *rart;
313: MatCheckProduct(C, 3);
315: rart = (Mat_RARt *)C->product->data;
316: MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt);
317: /* MatMatMatMultSymbolic used a different data */
318: C->product->data = rart->data;
319: MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C);
320: C->product->data = rart;
321: return 0;
322: }
324: PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C)
325: {
326: const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"};
327: PetscInt alg = 0; /* set default algorithm */
329: if (scall == MAT_INITIAL_MATRIX) {
330: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat");
331: PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL);
332: PetscOptionsEnd();
334: PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0);
335: MatCreate(PETSC_COMM_SELF, C);
336: switch (alg) {
337: case 1:
338: /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
339: MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C);
340: break;
341: case 2:
342: /* via coloring_rart: apply coloring C = R*A*R^T */
343: MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C);
344: break;
345: default:
346: /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
347: MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C);
348: break;
349: }
350: PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0);
351: }
353: PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0);
354: ((*C)->ops->rartnumeric)(A, R, *C);
355: PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0);
356: return 0;
357: }
359: /* ------------------------------------------------------------- */
360: PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)
361: {
362: Mat_Product *product = C->product;
363: Mat A = product->A, R = product->B;
364: MatProductAlgorithm alg = product->alg;
365: PetscReal fill = product->fill;
366: PetscBool flg;
368: PetscStrcmp(alg, "r*a*rt", &flg);
369: if (flg) {
370: MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C);
371: goto next;
372: }
374: PetscStrcmp(alg, "r*art", &flg);
375: if (flg) {
376: MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C);
377: goto next;
378: }
380: PetscStrcmp(alg, "coloring_rart", &flg);
381: if (flg) {
382: MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C);
383: goto next;
384: }
386: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported");
388: next:
389: C->ops->productnumeric = MatProductNumeric_RARt;
390: return 0;
391: }