Actual source code: matrart.c


  2: /*
  3:   Defines projective product routines where A is a SeqAIJ matrix
  4:           C = R * A * R^T
  5: */

  7: #include <../src/mat/impls/aij/seq/aij.h>
  8: #include <../src/mat/utils/freespace.h>
  9: #include <../src/mat/impls/dense/seq/dense.h>

 11: PetscErrorCode MatDestroy_SeqAIJ_RARt(void *data)
 12: {
 13:   Mat_RARt *rart = (Mat_RARt *)data;

 15:   MatTransposeColoringDestroy(&rart->matcoloring);
 16:   MatDestroy(&rart->Rt);
 17:   MatDestroy(&rart->RARt);
 18:   MatDestroy(&rart->ARt);
 19:   PetscFree(rart->work);
 20:   if (rart->destroy) (*rart->destroy)(rart->data);
 21:   PetscFree(rart);
 22:   return 0;
 23: }

 25: PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, PetscReal fill, Mat C)
 26: {
 27:   Mat                  P;
 28:   Mat_RARt            *rart;
 29:   MatColoring          coloring;
 30:   MatTransposeColoring matcoloring;
 31:   ISColoring           iscoloring;
 32:   Mat                  Rt_dense, RARt_dense;

 34:   MatCheckProduct(C, 4);
 36:   /* create symbolic P=Rt */
 37:   MatTransposeSymbolic(R, &P);

 39:   /* get symbolic C=Pt*A*P */
 40:   MatPtAPSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(A, P, fill, C);
 41:   MatSetBlockSizes(C, PetscAbs(R->rmap->bs), PetscAbs(R->rmap->bs));
 42:   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart;

 44:   /* create a supporting struct */
 45:   PetscNew(&rart);
 46:   C->product->data    = rart;
 47:   C->product->destroy = MatDestroy_SeqAIJ_RARt;

 49:   /* ------ Use coloring ---------- */
 50:   /* inode causes memory problem */
 51:   MatSetOption(C, MAT_USE_INODES, PETSC_FALSE);

 53:   /* Create MatTransposeColoring from symbolic C=R*A*R^T */
 54:   MatColoringCreate(C, &coloring);
 55:   MatColoringSetDistance(coloring, 2);
 56:   MatColoringSetType(coloring, MATCOLORINGSL);
 57:   MatColoringSetFromOptions(coloring);
 58:   MatColoringApply(coloring, &iscoloring);
 59:   MatColoringDestroy(&coloring);
 60:   MatTransposeColoringCreate(C, iscoloring, &matcoloring);

 62:   rart->matcoloring = matcoloring;
 63:   ISColoringDestroy(&iscoloring);

 65:   /* Create Rt_dense */
 66:   MatCreate(PETSC_COMM_SELF, &Rt_dense);
 67:   MatSetSizes(Rt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors);
 68:   MatSetType(Rt_dense, MATSEQDENSE);
 69:   MatSeqDenseSetPreallocation(Rt_dense, NULL);

 71:   Rt_dense->assembled = PETSC_TRUE;
 72:   rart->Rt            = Rt_dense;

 74:   /* Create RARt_dense = R*A*Rt_dense */
 75:   MatCreate(PETSC_COMM_SELF, &RARt_dense);
 76:   MatSetSizes(RARt_dense, C->rmap->n, matcoloring->ncolors, C->rmap->n, matcoloring->ncolors);
 77:   MatSetType(RARt_dense, MATSEQDENSE);
 78:   MatSeqDenseSetPreallocation(RARt_dense, NULL);

 80:   rart->RARt = RARt_dense;

 82:   /* Allocate work array to store columns of A*R^T used in MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense() */
 83:   PetscMalloc1(A->rmap->n * 4, &rart->work);

 85:   /* clean up */
 86:   MatDestroy(&P);

 88: #if defined(PETSC_USE_INFO)
 89:   {
 90:     Mat_SeqAIJ *c       = (Mat_SeqAIJ *)C->data;
 91:     PetscReal   density = (PetscReal)(c->nz) / (RARt_dense->rmap->n * RARt_dense->cmap->n);
 92:     PetscInfo(C, "C=R*(A*Rt) via coloring C - use sparse-dense inner products\n");
 93:     PetscInfo(C, "RARt_den %" PetscInt_FMT " %" PetscInt_FMT "; Rt %" PetscInt_FMT " %" PetscInt_FMT " (RARt->nz %" PetscInt_FMT ")/(m*ncolors)=%g\n", RARt_dense->rmap->n, RARt_dense->cmap->n, R->cmap->n, R->rmap->n, c->nz, (double)density);
 94:   }
 95: #endif
 96:   return 0;
 97: }

 99: /*
100:  RAB = R * A * B, R and A in seqaij format, B in dense format;
101: */
102: PetscErrorCode MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(Mat R, Mat A, Mat B, Mat RAB, PetscScalar *work)
103: {
104:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *r = (Mat_SeqAIJ *)R->data;
105:   PetscScalar        r1, r2, r3, r4;
106:   const PetscScalar *b, *b1, *b2, *b3, *b4;
107:   MatScalar         *aa, *ra;
108:   PetscInt           cn = B->cmap->n, bm = B->rmap->n, col, i, j, n, *ai = a->i, *aj, am = A->rmap->n;
109:   PetscInt           am2 = 2 * am, am3 = 3 * am, bm4 = 4 * bm;
110:   PetscScalar       *d, *c, *c2, *c3, *c4;
111:   PetscInt          *rj, rm = R->rmap->n, dm = RAB->rmap->n, dn = RAB->cmap->n;
112:   PetscInt           rm2 = 2 * rm, rm3 = 3 * rm, colrm;

114:   if (!dm || !dn) return 0;

120:   { /*
121:      This approach is not as good as original ones (will be removed later), but it reveals that
122:      AB_den=A*B takes almost all execution time in R*A*B for src/ksp/ksp/tutorials/ex56.c
123:      */
124:     PetscBool via_matmatmult = PETSC_FALSE;
125:     PetscOptionsGetBool(NULL, NULL, "-matrart_via_matmatmult", &via_matmatmult, NULL);
126:     if (via_matmatmult) {
127:       Mat AB_den = NULL;
128:       MatCreate(PetscObjectComm((PetscObject)A), &AB_den);
129:       MatMatMultSymbolic_SeqAIJ_SeqDense(A, B, 0.0, AB_den);
130:       MatMatMultNumeric_SeqAIJ_SeqDense(A, B, AB_den);
131:       MatMatMultNumeric_SeqAIJ_SeqDense(R, AB_den, RAB);
132:       MatDestroy(&AB_den);
133:       return 0;
134:     }
135:   }

137:   MatDenseGetArrayRead(B, &b);
138:   MatDenseGetArray(RAB, &d);
139:   b1 = b;
140:   b2 = b1 + bm;
141:   b3 = b2 + bm;
142:   b4 = b3 + bm;
143:   c  = work;
144:   c2 = c + am;
145:   c3 = c2 + am;
146:   c4 = c3 + am;
147:   for (col = 0; col < cn - 4; col += 4) { /* over columns of C */
148:     for (i = 0; i < am; i++) {            /* over rows of A in those columns */
149:       r1 = r2 = r3 = r4 = 0.0;
150:       n                 = ai[i + 1] - ai[i];
151:       aj                = a->j + ai[i];
152:       aa                = a->a + ai[i];
153:       for (j = 0; j < n; j++) {
154:         r1 += (*aa) * b1[*aj];
155:         r2 += (*aa) * b2[*aj];
156:         r3 += (*aa) * b3[*aj];
157:         r4 += (*aa++) * b4[*aj++];
158:       }
159:       c[i]       = r1;
160:       c[am + i]  = r2;
161:       c[am2 + i] = r3;
162:       c[am3 + i] = r4;
163:     }
164:     b1 += bm4;
165:     b2 += bm4;
166:     b3 += bm4;
167:     b4 += bm4;

169:     /* RAB[:,col] = R*C[:,col] */
170:     colrm = col * rm;
171:     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
172:       r1 = r2 = r3 = r4 = 0.0;
173:       n                 = r->i[i + 1] - r->i[i];
174:       rj                = r->j + r->i[i];
175:       ra                = r->a + r->i[i];
176:       for (j = 0; j < n; j++) {
177:         r1 += (*ra) * c[*rj];
178:         r2 += (*ra) * c2[*rj];
179:         r3 += (*ra) * c3[*rj];
180:         r4 += (*ra++) * c4[*rj++];
181:       }
182:       d[colrm + i]       = r1;
183:       d[colrm + rm + i]  = r2;
184:       d[colrm + rm2 + i] = r3;
185:       d[colrm + rm3 + i] = r4;
186:     }
187:   }
188:   for (; col < cn; col++) {    /* over extra columns of C */
189:     for (i = 0; i < am; i++) { /* over rows of A in those columns */
190:       r1 = 0.0;
191:       n  = a->i[i + 1] - a->i[i];
192:       aj = a->j + a->i[i];
193:       aa = a->a + a->i[i];
194:       for (j = 0; j < n; j++) r1 += (*aa++) * b1[*aj++];
195:       c[i] = r1;
196:     }
197:     b1 += bm;

199:     for (i = 0; i < rm; i++) { /* over rows of R in those columns */
200:       r1 = 0.0;
201:       n  = r->i[i + 1] - r->i[i];
202:       rj = r->j + r->i[i];
203:       ra = r->a + r->i[i];
204:       for (j = 0; j < n; j++) r1 += (*ra++) * c[*rj++];
205:       d[col * rm + i] = r1;
206:     }
207:   }
208:   PetscLogFlops(cn * 2.0 * (a->nz + r->nz));

210:   MatDenseRestoreArrayRead(B, &b);
211:   MatDenseRestoreArray(RAB, &d);
212:   MatAssemblyBegin(RAB, MAT_FINAL_ASSEMBLY);
213:   MatAssemblyEnd(RAB, MAT_FINAL_ASSEMBLY);
214:   return 0;
215: }

217: PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_colorrart(Mat A, Mat R, Mat C)
218: {
219:   Mat_RARt            *rart;
220:   MatTransposeColoring matcoloring;
221:   Mat                  Rt, RARt;

223:   MatCheckProduct(C, 3);
225:   rart = (Mat_RARt *)C->product->data;

227:   /* Get dense Rt by Apply MatTransposeColoring to R */
228:   matcoloring = rart->matcoloring;
229:   Rt          = rart->Rt;
230:   MatTransColoringApplySpToDen(matcoloring, R, Rt);

232:   /* Get dense RARt = R*A*Rt -- dominates! */
233:   RARt = rart->RARt;
234:   MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqDense(R, A, Rt, RARt, rart->work);

236:   /* Recover C from C_dense */
237:   MatTransColoringApplyDenToSp(matcoloring, RARt, C);
238:   return 0;
239: }

241: PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, PetscReal fill, Mat C)
242: {
243:   Mat       ARt;
244:   Mat_RARt *rart;
245:   char     *alg;

247:   MatCheckProduct(C, 4);
249:   /* create symbolic ARt = A*R^T  */
250:   MatProductCreate(A, R, NULL, &ARt);
251:   MatProductSetType(ARt, MATPRODUCT_ABt);
252:   MatProductSetAlgorithm(ARt, "sorted");
253:   MatProductSetFill(ARt, fill);
254:   MatProductSetFromOptions(ARt);
255:   MatProductSymbolic(ARt);

257:   /* compute symbolic C = R*ARt */
258:   /* set algorithm for C = R*ARt */
259:   PetscStrallocpy(C->product->alg, &alg);
260:   MatProductSetAlgorithm(C, "sorted");
261:   MatMatMultSymbolic_SeqAIJ_SeqAIJ(R, ARt, fill, C);
262:   /* resume original algorithm for C */
263:   MatProductSetAlgorithm(C, alg);
264:   PetscFree(alg);

266:   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult;

268:   PetscNew(&rart);
269:   rart->ARt           = ARt;
270:   C->product->data    = rart;
271:   C->product->destroy = MatDestroy_SeqAIJ_RARt;
272:   PetscInfo(C, "Use ARt=A*R^T, C=R*ARt via MatMatTransposeMult(). Coloring can be applied to A*R^T.\n");
273:   return 0;
274: }

276: PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ_matmattransposemult(Mat A, Mat R, Mat C)
277: {
278:   Mat_RARt *rart;

280:   MatCheckProduct(C, 3);
282:   rart = (Mat_RARt *)C->product->data;
283:   MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A, R, rart->ARt); /* dominate! */
284:   MatMatMultNumeric_SeqAIJ_SeqAIJ(R, rart->ARt, C);
285:   return 0;
286: }

288: PetscErrorCode MatRARtSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat R, PetscReal fill, Mat C)
289: {
290:   Mat       Rt;
291:   Mat_RARt *rart;

293:   MatCheckProduct(C, 4);
295:   MatTranspose(R, MAT_INITIAL_MATRIX, &Rt);
296:   MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ(R, A, Rt, fill, C);

298:   PetscNew(&rart);
299:   rart->data          = C->product->data;
300:   rart->destroy       = C->product->destroy;
301:   rart->Rt            = Rt;
302:   C->product->data    = rart;
303:   C->product->destroy = MatDestroy_SeqAIJ_RARt;
304:   C->ops->rartnumeric = MatRARtNumeric_SeqAIJ_SeqAIJ;
305:   PetscInfo(C, "Use Rt=R^T and C=R*A*Rt via MatMatMatMult() to avoid sparse inner products\n");
306:   return 0;
307: }

309: PetscErrorCode MatRARtNumeric_SeqAIJ_SeqAIJ(Mat A, Mat R, Mat C)
310: {
311:   Mat_RARt *rart;

313:   MatCheckProduct(C, 3);
315:   rart = (Mat_RARt *)C->product->data;
316:   MatTranspose(R, MAT_REUSE_MATRIX, &rart->Rt);
317:   /* MatMatMatMultSymbolic used a different data */
318:   C->product->data = rart->data;
319:   MatMatMatMultNumeric_SeqAIJ_SeqAIJ_SeqAIJ(R, A, rart->Rt, C);
320:   C->product->data = rart;
321:   return 0;
322: }

324: PetscErrorCode MatRARt_SeqAIJ_SeqAIJ(Mat A, Mat R, MatReuse scall, PetscReal fill, Mat *C)
325: {
326:   const char *algTypes[3] = {"matmatmatmult", "matmattransposemult", "coloring_rart"};
327:   PetscInt    alg         = 0; /* set default algorithm */

329:   if (scall == MAT_INITIAL_MATRIX) {
330:     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatRARt", "Mat");
331:     PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, 3, algTypes[0], &alg, NULL);
332:     PetscOptionsEnd();

334:     PetscLogEventBegin(MAT_RARtSymbolic, A, R, 0, 0);
335:     MatCreate(PETSC_COMM_SELF, C);
336:     switch (alg) {
337:     case 1:
338:       /* via matmattransposemult: ARt=A*R^T, C=R*ARt - matrix coloring can be applied to A*R^T */
339:       MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, *C);
340:       break;
341:     case 2:
342:       /* via coloring_rart: apply coloring C = R*A*R^T                          */
343:       MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, *C);
344:       break;
345:     default:
346:       /* via matmatmatmult: Rt=R^T, C=R*A*Rt - avoid inefficient sparse inner products */
347:       MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, *C);
348:       break;
349:     }
350:     PetscLogEventEnd(MAT_RARtSymbolic, A, R, 0, 0);
351:   }

353:   PetscLogEventBegin(MAT_RARtNumeric, A, R, 0, 0);
354:   ((*C)->ops->rartnumeric)(A, R, *C);
355:   PetscLogEventEnd(MAT_RARtNumeric, A, R, 0, 0);
356:   return 0;
357: }

359: /* ------------------------------------------------------------- */
360: PetscErrorCode MatProductSymbolic_RARt_SeqAIJ_SeqAIJ(Mat C)
361: {
362:   Mat_Product        *product = C->product;
363:   Mat                 A = product->A, R = product->B;
364:   MatProductAlgorithm alg  = product->alg;
365:   PetscReal           fill = product->fill;
366:   PetscBool           flg;

368:   PetscStrcmp(alg, "r*a*rt", &flg);
369:   if (flg) {
370:     MatRARtSymbolic_SeqAIJ_SeqAIJ(A, R, fill, C);
371:     goto next;
372:   }

374:   PetscStrcmp(alg, "r*art", &flg);
375:   if (flg) {
376:     MatRARtSymbolic_SeqAIJ_SeqAIJ_matmattransposemult(A, R, fill, C);
377:     goto next;
378:   }

380:   PetscStrcmp(alg, "coloring_rart", &flg);
381:   if (flg) {
382:     MatRARtSymbolic_SeqAIJ_SeqAIJ_colorrart(A, R, fill, C);
383:     goto next;
384:   }

386:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductAlgorithm is not supported");

388: next:
389:   C->ops->productnumeric = MatProductNumeric_RARt;
390:   return 0;
391: }