Actual source code: axpy.c


  2: #include <petsc/private/matimpl.h>

  4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y, PetscScalar a, Mat X, MatStructure str, Mat T)
  5: {
  6:   Mat A, F;
  7:   PetscErrorCode (*f)(Mat, Mat *);

  9:   PetscObjectQueryFunction((PetscObject)T, "MatTransposeGetMat_C", &f);
 10:   if (f) {
 11:     MatTransposeGetMat(T, &A);
 12:     if (T == X) {
 13:       PetscInfo(NULL, "Explicitly transposing X of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n");
 14:       MatTranspose(A, MAT_INITIAL_MATRIX, &F);
 15:       A = Y;
 16:     } else {
 17:       PetscInfo(NULL, "Transposing X because Y of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n");
 18:       MatTranspose(X, MAT_INITIAL_MATRIX, &F);
 19:     }
 20:   } else {
 21:     MatHermitianTransposeGetMat(T, &A);
 22:     if (T == X) {
 23:       PetscInfo(NULL, "Explicitly Hermitian transposing X of type MATHERITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n");
 24:       MatHermitianTranspose(A, MAT_INITIAL_MATRIX, &F);
 25:       A = Y;
 26:     } else {
 27:       PetscInfo(NULL, "Hermitian transposing X because Y of type MATHERITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n");
 28:       MatHermitianTranspose(X, MAT_INITIAL_MATRIX, &F);
 29:     }
 30:   }
 31:   MatAXPY(A, a, F, str);
 32:   MatDestroy(&F);
 33:   return 0;
 34: }

 36: /*@
 37:    MatAXPY - Computes Y = a*X + Y.

 39:    Logically  Collective

 41:    Input Parameters:
 42: +  a - the scalar multiplier
 43: .  X - the first matrix
 44: .  Y - the second matrix
 45: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

 47:    Note:
 48:    No operation is performed when a is zero.

 50:    Level: intermediate

 52: .seealso: `MatAYPX()`
 53:  @*/
 54: PetscErrorCode MatAXPY(Mat Y, PetscScalar a, Mat X, MatStructure str)
 55: {
 56:   PetscInt  M1, M2, N1, N2;
 57:   PetscInt  m1, m2, n1, n2;
 58:   PetscBool sametype, transpose;

 64:   MatGetSize(X, &M1, &N1);
 65:   MatGetSize(Y, &M2, &N2);
 66:   MatGetLocalSize(X, &m1, &n1);
 67:   MatGetLocalSize(Y, &m2, &n2);
 72:   if (a == (PetscScalar)0.0) return 0;
 73:   if (Y == X) {
 74:     MatScale(Y, 1.0 + a);
 75:     return 0;
 76:   }
 77:   PetscObjectObjectTypeCompare((PetscObject)X, (PetscObject)Y, &sametype);
 78:   PetscLogEventBegin(MAT_AXPY, Y, 0, 0, 0);
 79:   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
 80:     PetscUseTypeMethod(Y, axpy, a, X, str);
 81:   } else {
 82:     PetscObjectTypeCompareAny((PetscObject)X, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, "");
 83:     if (transpose) {
 84:       MatTransposeAXPY_Private(Y, a, X, str, X);
 85:     } else {
 86:       PetscObjectTypeCompareAny((PetscObject)Y, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, "");
 87:       if (transpose) {
 88:         MatTransposeAXPY_Private(Y, a, X, str, Y);
 89:       } else {
 90:         MatAXPY_Basic(Y, a, X, str);
 91:       }
 92:     }
 93:   }
 94:   PetscLogEventEnd(MAT_AXPY, Y, 0, 0, 0);
 95:   return 0;
 96: }

 98: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
 99: {
100:   PetscErrorCode (*preall)(Mat, Mat, Mat *) = NULL;

102:   /* look for any available faster alternative to the general preallocator */
103:   PetscObjectQueryFunction((PetscObject)Y, "MatAXPYGetPreallocation_C", &preall);
104:   if (preall) {
105:     (*preall)(Y, X, B);
106:   } else { /* Use MatPrellocator, assumes same row-col distribution */
107:     Mat      preallocator;
108:     PetscInt r, rstart, rend;
109:     PetscInt m, n, M, N;

111:     MatGetRowUpperTriangular(Y);
112:     MatGetRowUpperTriangular(X);
113:     MatGetSize(Y, &M, &N);
114:     MatGetLocalSize(Y, &m, &n);
115:     MatCreate(PetscObjectComm((PetscObject)Y), &preallocator);
116:     MatSetType(preallocator, MATPREALLOCATOR);
117:     MatSetLayouts(preallocator, Y->rmap, Y->cmap);
118:     MatSetUp(preallocator);
119:     MatGetOwnershipRange(preallocator, &rstart, &rend);
120:     for (r = rstart; r < rend; ++r) {
121:       PetscInt           ncols;
122:       const PetscInt    *row;
123:       const PetscScalar *vals;

125:       MatGetRow(Y, r, &ncols, &row, &vals);
126:       MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES);
127:       MatRestoreRow(Y, r, &ncols, &row, &vals);
128:       MatGetRow(X, r, &ncols, &row, &vals);
129:       MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES);
130:       MatRestoreRow(X, r, &ncols, &row, &vals);
131:     }
132:     MatSetOption(preallocator, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
133:     MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY);
134:     MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY);
135:     MatRestoreRowUpperTriangular(Y);
136:     MatRestoreRowUpperTriangular(X);

138:     MatCreate(PetscObjectComm((PetscObject)Y), B);
139:     MatSetType(*B, ((PetscObject)Y)->type_name);
140:     MatSetLayouts(*B, Y->rmap, Y->cmap);
141:     MatPreallocatorPreallocate(preallocator, PETSC_FALSE, *B);
142:     MatDestroy(&preallocator);
143:   }
144:   return 0;
145: }

147: PetscErrorCode MatAXPY_Basic(Mat Y, PetscScalar a, Mat X, MatStructure str)
148: {
149:   PetscBool isshell, isdense, isnest;

151:   MatIsShell(Y, &isshell);
152:   if (isshell) { /* MatShell has special support for AXPY */
153:     PetscErrorCode (*f)(Mat, PetscScalar, Mat, MatStructure);

155:     MatGetOperation(Y, MATOP_AXPY, (void (**)(void)) & f);
156:     if (f) {
157:       (*f)(Y, a, X, str);
158:       return 0;
159:     }
160:   }
161:   /* no need to preallocate if Y is dense */
162:   PetscObjectBaseTypeCompareAny((PetscObject)Y, &isdense, MATSEQDENSE, MATMPIDENSE, "");
163:   if (isdense) {
164:     PetscObjectTypeCompare((PetscObject)X, MATNEST, &isnest);
165:     if (isnest) {
166:       MatAXPY_Dense_Nest(Y, a, X);
167:       return 0;
168:     }
169:     if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
170:   }
171:   if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
172:     PetscInt           i, start, end, j, ncols, m, n;
173:     const PetscInt    *row;
174:     PetscScalar       *val;
175:     const PetscScalar *vals;

177:     MatGetSize(X, &m, &n);
178:     MatGetOwnershipRange(X, &start, &end);
179:     MatGetRowUpperTriangular(X);
180:     if (a == 1.0) {
181:       for (i = start; i < end; i++) {
182:         MatGetRow(X, i, &ncols, &row, &vals);
183:         MatSetValues(Y, 1, &i, ncols, row, vals, ADD_VALUES);
184:         MatRestoreRow(X, i, &ncols, &row, &vals);
185:       }
186:     } else {
187:       PetscInt vs = 100;
188:       /* realloc if needed, as this function may be used in parallel */
189:       PetscMalloc1(vs, &val);
190:       for (i = start; i < end; i++) {
191:         MatGetRow(X, i, &ncols, &row, &vals);
192:         if (vs < ncols) {
193:           vs = PetscMin(2 * ncols, n);
194:           PetscRealloc(vs * sizeof(*val), &val);
195:         }
196:         for (j = 0; j < ncols; j++) val[j] = a * vals[j];
197:         MatSetValues(Y, 1, &i, ncols, row, val, ADD_VALUES);
198:         MatRestoreRow(X, i, &ncols, &row, &vals);
199:       }
200:       PetscFree(val);
201:     }
202:     MatRestoreRowUpperTriangular(X);
203:     MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY);
204:     MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY);
205:   } else {
206:     Mat B;

208:     MatAXPY_Basic_Preallocate(Y, X, &B);
209:     MatAXPY_BasicWithPreallocation(B, Y, a, X, str);
210:     MatHeaderMerge(Y, &B);
211:   }
212:   return 0;
213: }

215: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B, Mat Y, PetscScalar a, Mat X, MatStructure str)
216: {
217:   PetscInt           i, start, end, j, ncols, m, n;
218:   const PetscInt    *row;
219:   PetscScalar       *val;
220:   const PetscScalar *vals;

222:   MatGetSize(X, &m, &n);
223:   MatGetOwnershipRange(X, &start, &end);
224:   MatGetRowUpperTriangular(Y);
225:   MatGetRowUpperTriangular(X);
226:   if (a == 1.0) {
227:     for (i = start; i < end; i++) {
228:       MatGetRow(Y, i, &ncols, &row, &vals);
229:       MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES);
230:       MatRestoreRow(Y, i, &ncols, &row, &vals);

232:       MatGetRow(X, i, &ncols, &row, &vals);
233:       MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES);
234:       MatRestoreRow(X, i, &ncols, &row, &vals);
235:     }
236:   } else {
237:     PetscInt vs = 100;
238:     /* realloc if needed, as this function may be used in parallel */
239:     PetscMalloc1(vs, &val);
240:     for (i = start; i < end; i++) {
241:       MatGetRow(Y, i, &ncols, &row, &vals);
242:       MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES);
243:       MatRestoreRow(Y, i, &ncols, &row, &vals);

245:       MatGetRow(X, i, &ncols, &row, &vals);
246:       if (vs < ncols) {
247:         vs = PetscMin(2 * ncols, n);
248:         PetscRealloc(vs * sizeof(*val), &val);
249:       }
250:       for (j = 0; j < ncols; j++) val[j] = a * vals[j];
251:       MatSetValues(B, 1, &i, ncols, row, val, ADD_VALUES);
252:       MatRestoreRow(X, i, &ncols, &row, &vals);
253:     }
254:     PetscFree(val);
255:   }
256:   MatRestoreRowUpperTriangular(Y);
257:   MatRestoreRowUpperTriangular(X);
258:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
259:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
260:   return 0;
261: }

263: /*@
264:    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.

266:    Neighbor-wise Collective

268:    Input Parameters:
269: +  Y - the matrices
270: -  a - the PetscScalar

272:    Level: intermediate

274:    Notes:
275:     If Y is a rectangular matrix, the shift is done on the main diagonal Y_{ii} of the matrix (https://en.wikipedia.org/wiki/Main_diagonal)

277:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
278:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
279:    entry). No operation is performed when a is zero.

281:    To form Y = Y + diag(V) use MatDiagonalSet()

283: .seealso: `MatDiagonalSet()`, `MatScale()`, `MatDiagonalScale()`
284:  @*/
285: PetscErrorCode MatShift(Mat Y, PetscScalar a)
286: {
290:   MatCheckPreallocated(Y, 1);
291:   if (a == 0.0) return 0;

293:   if (Y->ops->shift) PetscUseTypeMethod(Y, shift, a);
294:   else MatShift_Basic(Y, a);

296:   PetscObjectStateIncrease((PetscObject)Y);
297:   return 0;
298: }

300: PetscErrorCode MatDiagonalSet_Default(Mat Y, Vec D, InsertMode is)
301: {
302:   PetscInt           i, start, end;
303:   const PetscScalar *v;

305:   MatGetOwnershipRange(Y, &start, &end);
306:   VecGetArrayRead(D, &v);
307:   for (i = start; i < end; i++) MatSetValues(Y, 1, &i, 1, &i, v + i - start, is);
308:   VecRestoreArrayRead(D, &v);
309:   MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY);
310:   MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY);
311:   return 0;
312: }

314: /*@
315:    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
316:    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
317:    INSERT_VALUES.

319:    Neighbor-wise Collective

321:    Input Parameters:
322: +  Y - the input matrix
323: .  D - the diagonal matrix, represented as a vector
324: -  i - INSERT_VALUES or ADD_VALUES

326:    Note:
327:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
328:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
329:    entry).

331:    Level: intermediate

333: .seealso: `MatShift()`, `MatScale()`, `MatDiagonalScale()`
334: @*/
335: PetscErrorCode MatDiagonalSet(Mat Y, Vec D, InsertMode is)
336: {
337:   PetscInt matlocal, veclocal;

341:   MatGetLocalSize(Y, &matlocal, NULL);
342:   VecGetLocalSize(D, &veclocal);
344:   if (Y->ops->diagonalset) PetscUseTypeMethod(Y, diagonalset, D, is);
345:   else MatDiagonalSet_Default(Y, D, is);
346:   PetscObjectStateIncrease((PetscObject)Y);
347:   return 0;
348: }

350: /*@
351:    MatAYPX - Computes Y = a*Y + X.

353:    Logically on Mat

355:    Input Parameters:
356: +  a - the PetscScalar multiplier
357: .  Y - the first matrix
358: .  X - the second matrix
359: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

361:    Level: intermediate

363: .seealso: `MatAXPY()`
364:  @*/
365: PetscErrorCode MatAYPX(Mat Y, PetscScalar a, Mat X, MatStructure str)
366: {
367:   MatScale(Y, a);
368:   MatAXPY(Y, 1.0, X, str);
369:   return 0;
370: }

372: /*@
373:     MatComputeOperator - Computes the explicit matrix

375:     Collective

377:     Input Parameters:
378: +   inmat - the matrix
379: -   mattype - the matrix type for the explicit operator

381:     Output Parameter:
382: .   mat - the explicit  operator

384:     Note:
385:     This computation is done by applying the operators to columns of the identity matrix.
386:     This routine is costly in general, and is recommended for use only with relatively small systems.
387:     Currently, this routine uses a dense matrix format if mattype == NULL.

389:     Level: advanced

391: @*/
392: PetscErrorCode MatComputeOperator(Mat inmat, MatType mattype, Mat *mat)
393: {
396:   MatConvert_Shell(inmat, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat);
397:   return 0;
398: }

400: /*@
401:     MatComputeOperatorTranspose - Computes the explicit matrix representation of
402:         a give matrix that can apply MatMultTranspose()

404:     Collective

406:     Input Parameters:
407: +   inmat - the matrix
408: -   mattype - the matrix type for the explicit operator

410:     Output Parameter:
411: .   mat - the explicit  operator transposed

413:     Note:
414:     This computation is done by applying the transpose of the operator to columns of the identity matrix.
415:     This routine is costly in general, and is recommended for use only with relatively small systems.
416:     Currently, this routine uses a dense matrix format if mattype == NULL.

418:     Level: advanced

420: @*/
421: PetscErrorCode MatComputeOperatorTranspose(Mat inmat, MatType mattype, Mat *mat)
422: {
423:   Mat A;

427:   MatCreateTranspose(inmat, &A);
428:   MatConvert_Shell(A, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat);
429:   MatDestroy(&A);
430:   return 0;
431: }

433: /*@
434:   MatChop - Set all values in the matrix less than the tolerance to zero

436:   Input Parameters:
437: + A   - The matrix
438: - tol - The zero tolerance

440:   Output Parameters:
441: . A - The chopped matrix

443:   Level: intermediate

445: .seealso: `MatCreate()`, `MatZeroEntries()`
446:  @*/
447: PetscErrorCode MatChop(Mat A, PetscReal tol)
448: {
449:   Mat          a;
450:   PetscScalar *newVals;
451:   PetscInt    *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
452:   PetscBool    flg;

454:   PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, "");
455:   if (flg) {
456:     MatDenseGetLocalMatrix(A, &a);
457:     MatDenseGetLDA(a, &r);
458:     MatGetSize(a, &rStart, &rEnd);
459:     MatDenseGetArray(a, &newVals);
460:     for (; colMax < rEnd; ++colMax) {
461:       for (maxRows = 0; maxRows < rStart; ++maxRows) newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
462:     }
463:     MatDenseRestoreArray(a, &newVals);
464:   } else {
465:     MatGetOwnershipRange(A, &rStart, &rEnd);
466:     MatGetRowUpperTriangular(A);
467:     for (r = rStart; r < rEnd; ++r) {
468:       PetscInt ncols;

470:       MatGetRow(A, r, &ncols, NULL, NULL);
471:       colMax = PetscMax(colMax, ncols);
472:       MatRestoreRow(A, r, &ncols, NULL, NULL);
473:     }
474:     numRows = rEnd - rStart;
475:     MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
476:     PetscMalloc2(colMax, &newCols, colMax, &newVals);
477:     MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg); /* cache user-defined value */
478:     MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
479:     /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd()             */
480:     /* that are potentially called many times depending on the distribution of A */
481:     for (r = rStart; r < rStart + maxRows; ++r) {
482:       const PetscScalar *vals;
483:       const PetscInt    *cols;
484:       PetscInt           ncols, newcols, c;

486:       if (r < rEnd) {
487:         MatGetRow(A, r, &ncols, &cols, &vals);
488:         for (c = 0; c < ncols; ++c) {
489:           newCols[c] = cols[c];
490:           newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
491:         }
492:         newcols = ncols;
493:         MatRestoreRow(A, r, &ncols, &cols, &vals);
494:         MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
495:       }
496:       MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
497:       MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
498:     }
499:     MatRestoreRowUpperTriangular(A);
500:     PetscFree2(newCols, newVals);
501:     MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg); /* reset option to its user-defined value */
502:   }
503:   return 0;
504: }