Actual source code: axpy.c
2: #include <petsc/private/matimpl.h>
4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y, PetscScalar a, Mat X, MatStructure str, Mat T)
5: {
6: Mat A, F;
7: PetscErrorCode (*f)(Mat, Mat *);
9: PetscObjectQueryFunction((PetscObject)T, "MatTransposeGetMat_C", &f);
10: if (f) {
11: MatTransposeGetMat(T, &A);
12: if (T == X) {
13: PetscInfo(NULL, "Explicitly transposing X of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n");
14: MatTranspose(A, MAT_INITIAL_MATRIX, &F);
15: A = Y;
16: } else {
17: PetscInfo(NULL, "Transposing X because Y of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n");
18: MatTranspose(X, MAT_INITIAL_MATRIX, &F);
19: }
20: } else {
21: MatHermitianTransposeGetMat(T, &A);
22: if (T == X) {
23: PetscInfo(NULL, "Explicitly Hermitian transposing X of type MATHERITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n");
24: MatHermitianTranspose(A, MAT_INITIAL_MATRIX, &F);
25: A = Y;
26: } else {
27: PetscInfo(NULL, "Hermitian transposing X because Y of type MATHERITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n");
28: MatHermitianTranspose(X, MAT_INITIAL_MATRIX, &F);
29: }
30: }
31: MatAXPY(A, a, F, str);
32: MatDestroy(&F);
33: return 0;
34: }
36: /*@
37: MatAXPY - Computes Y = a*X + Y.
39: Logically Collective
41: Input Parameters:
42: + a - the scalar multiplier
43: . X - the first matrix
44: . Y - the second matrix
45: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
47: Note:
48: No operation is performed when a is zero.
50: Level: intermediate
52: .seealso: `MatAYPX()`
53: @*/
54: PetscErrorCode MatAXPY(Mat Y, PetscScalar a, Mat X, MatStructure str)
55: {
56: PetscInt M1, M2, N1, N2;
57: PetscInt m1, m2, n1, n2;
58: PetscBool sametype, transpose;
64: MatGetSize(X, &M1, &N1);
65: MatGetSize(Y, &M2, &N2);
66: MatGetLocalSize(X, &m1, &n1);
67: MatGetLocalSize(Y, &m2, &n2);
72: if (a == (PetscScalar)0.0) return 0;
73: if (Y == X) {
74: MatScale(Y, 1.0 + a);
75: return 0;
76: }
77: PetscObjectObjectTypeCompare((PetscObject)X, (PetscObject)Y, &sametype);
78: PetscLogEventBegin(MAT_AXPY, Y, 0, 0, 0);
79: if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
80: PetscUseTypeMethod(Y, axpy, a, X, str);
81: } else {
82: PetscObjectTypeCompareAny((PetscObject)X, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, "");
83: if (transpose) {
84: MatTransposeAXPY_Private(Y, a, X, str, X);
85: } else {
86: PetscObjectTypeCompareAny((PetscObject)Y, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, "");
87: if (transpose) {
88: MatTransposeAXPY_Private(Y, a, X, str, Y);
89: } else {
90: MatAXPY_Basic(Y, a, X, str);
91: }
92: }
93: }
94: PetscLogEventEnd(MAT_AXPY, Y, 0, 0, 0);
95: return 0;
96: }
98: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
99: {
100: PetscErrorCode (*preall)(Mat, Mat, Mat *) = NULL;
102: /* look for any available faster alternative to the general preallocator */
103: PetscObjectQueryFunction((PetscObject)Y, "MatAXPYGetPreallocation_C", &preall);
104: if (preall) {
105: (*preall)(Y, X, B);
106: } else { /* Use MatPrellocator, assumes same row-col distribution */
107: Mat preallocator;
108: PetscInt r, rstart, rend;
109: PetscInt m, n, M, N;
111: MatGetRowUpperTriangular(Y);
112: MatGetRowUpperTriangular(X);
113: MatGetSize(Y, &M, &N);
114: MatGetLocalSize(Y, &m, &n);
115: MatCreate(PetscObjectComm((PetscObject)Y), &preallocator);
116: MatSetType(preallocator, MATPREALLOCATOR);
117: MatSetLayouts(preallocator, Y->rmap, Y->cmap);
118: MatSetUp(preallocator);
119: MatGetOwnershipRange(preallocator, &rstart, &rend);
120: for (r = rstart; r < rend; ++r) {
121: PetscInt ncols;
122: const PetscInt *row;
123: const PetscScalar *vals;
125: MatGetRow(Y, r, &ncols, &row, &vals);
126: MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES);
127: MatRestoreRow(Y, r, &ncols, &row, &vals);
128: MatGetRow(X, r, &ncols, &row, &vals);
129: MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES);
130: MatRestoreRow(X, r, &ncols, &row, &vals);
131: }
132: MatSetOption(preallocator, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
133: MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY);
134: MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY);
135: MatRestoreRowUpperTriangular(Y);
136: MatRestoreRowUpperTriangular(X);
138: MatCreate(PetscObjectComm((PetscObject)Y), B);
139: MatSetType(*B, ((PetscObject)Y)->type_name);
140: MatSetLayouts(*B, Y->rmap, Y->cmap);
141: MatPreallocatorPreallocate(preallocator, PETSC_FALSE, *B);
142: MatDestroy(&preallocator);
143: }
144: return 0;
145: }
147: PetscErrorCode MatAXPY_Basic(Mat Y, PetscScalar a, Mat X, MatStructure str)
148: {
149: PetscBool isshell, isdense, isnest;
151: MatIsShell(Y, &isshell);
152: if (isshell) { /* MatShell has special support for AXPY */
153: PetscErrorCode (*f)(Mat, PetscScalar, Mat, MatStructure);
155: MatGetOperation(Y, MATOP_AXPY, (void (**)(void)) & f);
156: if (f) {
157: (*f)(Y, a, X, str);
158: return 0;
159: }
160: }
161: /* no need to preallocate if Y is dense */
162: PetscObjectBaseTypeCompareAny((PetscObject)Y, &isdense, MATSEQDENSE, MATMPIDENSE, "");
163: if (isdense) {
164: PetscObjectTypeCompare((PetscObject)X, MATNEST, &isnest);
165: if (isnest) {
166: MatAXPY_Dense_Nest(Y, a, X);
167: return 0;
168: }
169: if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
170: }
171: if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
172: PetscInt i, start, end, j, ncols, m, n;
173: const PetscInt *row;
174: PetscScalar *val;
175: const PetscScalar *vals;
177: MatGetSize(X, &m, &n);
178: MatGetOwnershipRange(X, &start, &end);
179: MatGetRowUpperTriangular(X);
180: if (a == 1.0) {
181: for (i = start; i < end; i++) {
182: MatGetRow(X, i, &ncols, &row, &vals);
183: MatSetValues(Y, 1, &i, ncols, row, vals, ADD_VALUES);
184: MatRestoreRow(X, i, &ncols, &row, &vals);
185: }
186: } else {
187: PetscInt vs = 100;
188: /* realloc if needed, as this function may be used in parallel */
189: PetscMalloc1(vs, &val);
190: for (i = start; i < end; i++) {
191: MatGetRow(X, i, &ncols, &row, &vals);
192: if (vs < ncols) {
193: vs = PetscMin(2 * ncols, n);
194: PetscRealloc(vs * sizeof(*val), &val);
195: }
196: for (j = 0; j < ncols; j++) val[j] = a * vals[j];
197: MatSetValues(Y, 1, &i, ncols, row, val, ADD_VALUES);
198: MatRestoreRow(X, i, &ncols, &row, &vals);
199: }
200: PetscFree(val);
201: }
202: MatRestoreRowUpperTriangular(X);
203: MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY);
204: MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY);
205: } else {
206: Mat B;
208: MatAXPY_Basic_Preallocate(Y, X, &B);
209: MatAXPY_BasicWithPreallocation(B, Y, a, X, str);
210: MatHeaderMerge(Y, &B);
211: }
212: return 0;
213: }
215: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B, Mat Y, PetscScalar a, Mat X, MatStructure str)
216: {
217: PetscInt i, start, end, j, ncols, m, n;
218: const PetscInt *row;
219: PetscScalar *val;
220: const PetscScalar *vals;
222: MatGetSize(X, &m, &n);
223: MatGetOwnershipRange(X, &start, &end);
224: MatGetRowUpperTriangular(Y);
225: MatGetRowUpperTriangular(X);
226: if (a == 1.0) {
227: for (i = start; i < end; i++) {
228: MatGetRow(Y, i, &ncols, &row, &vals);
229: MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES);
230: MatRestoreRow(Y, i, &ncols, &row, &vals);
232: MatGetRow(X, i, &ncols, &row, &vals);
233: MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES);
234: MatRestoreRow(X, i, &ncols, &row, &vals);
235: }
236: } else {
237: PetscInt vs = 100;
238: /* realloc if needed, as this function may be used in parallel */
239: PetscMalloc1(vs, &val);
240: for (i = start; i < end; i++) {
241: MatGetRow(Y, i, &ncols, &row, &vals);
242: MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES);
243: MatRestoreRow(Y, i, &ncols, &row, &vals);
245: MatGetRow(X, i, &ncols, &row, &vals);
246: if (vs < ncols) {
247: vs = PetscMin(2 * ncols, n);
248: PetscRealloc(vs * sizeof(*val), &val);
249: }
250: for (j = 0; j < ncols; j++) val[j] = a * vals[j];
251: MatSetValues(B, 1, &i, ncols, row, val, ADD_VALUES);
252: MatRestoreRow(X, i, &ncols, &row, &vals);
253: }
254: PetscFree(val);
255: }
256: MatRestoreRowUpperTriangular(Y);
257: MatRestoreRowUpperTriangular(X);
258: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
259: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
260: return 0;
261: }
263: /*@
264: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
266: Neighbor-wise Collective
268: Input Parameters:
269: + Y - the matrices
270: - a - the PetscScalar
272: Level: intermediate
274: Notes:
275: If Y is a rectangular matrix, the shift is done on the main diagonal Y_{ii} of the matrix (https://en.wikipedia.org/wiki/Main_diagonal)
277: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
278: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
279: entry). No operation is performed when a is zero.
281: To form Y = Y + diag(V) use MatDiagonalSet()
283: .seealso: `MatDiagonalSet()`, `MatScale()`, `MatDiagonalScale()`
284: @*/
285: PetscErrorCode MatShift(Mat Y, PetscScalar a)
286: {
290: MatCheckPreallocated(Y, 1);
291: if (a == 0.0) return 0;
293: if (Y->ops->shift) PetscUseTypeMethod(Y, shift, a);
294: else MatShift_Basic(Y, a);
296: PetscObjectStateIncrease((PetscObject)Y);
297: return 0;
298: }
300: PetscErrorCode MatDiagonalSet_Default(Mat Y, Vec D, InsertMode is)
301: {
302: PetscInt i, start, end;
303: const PetscScalar *v;
305: MatGetOwnershipRange(Y, &start, &end);
306: VecGetArrayRead(D, &v);
307: for (i = start; i < end; i++) MatSetValues(Y, 1, &i, 1, &i, v + i - start, is);
308: VecRestoreArrayRead(D, &v);
309: MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY);
310: MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY);
311: return 0;
312: }
314: /*@
315: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
316: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
317: INSERT_VALUES.
319: Neighbor-wise Collective
321: Input Parameters:
322: + Y - the input matrix
323: . D - the diagonal matrix, represented as a vector
324: - i - INSERT_VALUES or ADD_VALUES
326: Note:
327: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
328: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
329: entry).
331: Level: intermediate
333: .seealso: `MatShift()`, `MatScale()`, `MatDiagonalScale()`
334: @*/
335: PetscErrorCode MatDiagonalSet(Mat Y, Vec D, InsertMode is)
336: {
337: PetscInt matlocal, veclocal;
341: MatGetLocalSize(Y, &matlocal, NULL);
342: VecGetLocalSize(D, &veclocal);
344: if (Y->ops->diagonalset) PetscUseTypeMethod(Y, diagonalset, D, is);
345: else MatDiagonalSet_Default(Y, D, is);
346: PetscObjectStateIncrease((PetscObject)Y);
347: return 0;
348: }
350: /*@
351: MatAYPX - Computes Y = a*Y + X.
353: Logically on Mat
355: Input Parameters:
356: + a - the PetscScalar multiplier
357: . Y - the first matrix
358: . X - the second matrix
359: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
361: Level: intermediate
363: .seealso: `MatAXPY()`
364: @*/
365: PetscErrorCode MatAYPX(Mat Y, PetscScalar a, Mat X, MatStructure str)
366: {
367: MatScale(Y, a);
368: MatAXPY(Y, 1.0, X, str);
369: return 0;
370: }
372: /*@
373: MatComputeOperator - Computes the explicit matrix
375: Collective
377: Input Parameters:
378: + inmat - the matrix
379: - mattype - the matrix type for the explicit operator
381: Output Parameter:
382: . mat - the explicit operator
384: Note:
385: This computation is done by applying the operators to columns of the identity matrix.
386: This routine is costly in general, and is recommended for use only with relatively small systems.
387: Currently, this routine uses a dense matrix format if mattype == NULL.
389: Level: advanced
391: @*/
392: PetscErrorCode MatComputeOperator(Mat inmat, MatType mattype, Mat *mat)
393: {
396: MatConvert_Shell(inmat, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat);
397: return 0;
398: }
400: /*@
401: MatComputeOperatorTranspose - Computes the explicit matrix representation of
402: a give matrix that can apply MatMultTranspose()
404: Collective
406: Input Parameters:
407: + inmat - the matrix
408: - mattype - the matrix type for the explicit operator
410: Output Parameter:
411: . mat - the explicit operator transposed
413: Note:
414: This computation is done by applying the transpose of the operator to columns of the identity matrix.
415: This routine is costly in general, and is recommended for use only with relatively small systems.
416: Currently, this routine uses a dense matrix format if mattype == NULL.
418: Level: advanced
420: @*/
421: PetscErrorCode MatComputeOperatorTranspose(Mat inmat, MatType mattype, Mat *mat)
422: {
423: Mat A;
427: MatCreateTranspose(inmat, &A);
428: MatConvert_Shell(A, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat);
429: MatDestroy(&A);
430: return 0;
431: }
433: /*@
434: MatChop - Set all values in the matrix less than the tolerance to zero
436: Input Parameters:
437: + A - The matrix
438: - tol - The zero tolerance
440: Output Parameters:
441: . A - The chopped matrix
443: Level: intermediate
445: .seealso: `MatCreate()`, `MatZeroEntries()`
446: @*/
447: PetscErrorCode MatChop(Mat A, PetscReal tol)
448: {
449: Mat a;
450: PetscScalar *newVals;
451: PetscInt *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
452: PetscBool flg;
454: PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, "");
455: if (flg) {
456: MatDenseGetLocalMatrix(A, &a);
457: MatDenseGetLDA(a, &r);
458: MatGetSize(a, &rStart, &rEnd);
459: MatDenseGetArray(a, &newVals);
460: for (; colMax < rEnd; ++colMax) {
461: for (maxRows = 0; maxRows < rStart; ++maxRows) newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
462: }
463: MatDenseRestoreArray(a, &newVals);
464: } else {
465: MatGetOwnershipRange(A, &rStart, &rEnd);
466: MatGetRowUpperTriangular(A);
467: for (r = rStart; r < rEnd; ++r) {
468: PetscInt ncols;
470: MatGetRow(A, r, &ncols, NULL, NULL);
471: colMax = PetscMax(colMax, ncols);
472: MatRestoreRow(A, r, &ncols, NULL, NULL);
473: }
474: numRows = rEnd - rStart;
475: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
476: PetscMalloc2(colMax, &newCols, colMax, &newVals);
477: MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg); /* cache user-defined value */
478: MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
479: /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd() */
480: /* that are potentially called many times depending on the distribution of A */
481: for (r = rStart; r < rStart + maxRows; ++r) {
482: const PetscScalar *vals;
483: const PetscInt *cols;
484: PetscInt ncols, newcols, c;
486: if (r < rEnd) {
487: MatGetRow(A, r, &ncols, &cols, &vals);
488: for (c = 0; c < ncols; ++c) {
489: newCols[c] = cols[c];
490: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
491: }
492: newcols = ncols;
493: MatRestoreRow(A, r, &ncols, &cols, &vals);
494: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
495: }
496: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
497: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
498: }
499: MatRestoreRowUpperTriangular(A);
500: PetscFree2(newCols, newVals);
501: MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg); /* reset option to its user-defined value */
502: }
503: return 0;
504: }