Actual source code: dense.c
1: /*
2: Defines the basic matrix operations for sequential dense.
3: Portions of this code are under:
4: Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5: */
7: #include <../src/mat/impls/dense/seq/dense.h>
8: #include <petscblaslapack.h>
9: #include <../src/mat/impls/aij/seq/aij.h>
11: PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian)
12: {
13: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
14: PetscInt j, k, n = A->rmap->n;
15: PetscScalar *v;
18: MatDenseGetArray(A, &v);
19: if (!hermitian) {
20: for (k = 0; k < n; k++) {
21: for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j];
22: }
23: } else {
24: for (k = 0; k < n; k++) {
25: for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]);
26: }
27: }
28: MatDenseRestoreArray(A, &v);
29: return 0;
30: }
32: PETSC_EXTERN PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A)
33: {
34: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
35: PetscBLASInt info, n;
37: if (!A->rmap->n || !A->cmap->n) return 0;
38: PetscBLASIntCast(A->cmap->n, &n);
39: if (A->factortype == MAT_FACTOR_LU) {
41: if (!mat->fwork) {
42: mat->lfwork = n;
43: PetscMalloc1(mat->lfwork, &mat->fwork);
44: }
45: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
46: PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
47: PetscFPTrapPop();
48: PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0);
49: } else if (A->factortype == MAT_FACTOR_CHOLESKY) {
50: if (A->spd == PETSC_BOOL3_TRUE) {
51: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
52: PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info));
53: PetscFPTrapPop();
54: MatSeqDenseSymmetrize_Private(A, PETSC_TRUE);
55: #if defined(PETSC_USE_COMPLEX)
56: } else if (A->hermitian == PETSC_BOOL3_TRUE) {
59: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
60: PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
61: PetscFPTrapPop();
62: MatSeqDenseSymmetrize_Private(A, PETSC_TRUE);
63: #endif
64: } else { /* symmetric case */
67: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
68: PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
69: PetscFPTrapPop();
70: MatSeqDenseSymmetrize_Private(A, PETSC_FALSE);
71: }
73: PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0);
74: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve");
76: A->ops->solve = NULL;
77: A->ops->matsolve = NULL;
78: A->ops->solvetranspose = NULL;
79: A->ops->matsolvetranspose = NULL;
80: A->ops->solveadd = NULL;
81: A->ops->solvetransposeadd = NULL;
82: A->factortype = MAT_FACTOR_NONE;
83: PetscFree(A->solvertype);
84: return 0;
85: }
87: PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
88: {
89: Mat_SeqDense *l = (Mat_SeqDense *)A->data;
90: PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j;
91: PetscScalar *slot, *bb, *v;
92: const PetscScalar *xx;
94: if (PetscDefined(USE_DEBUG)) {
95: for (i = 0; i < N; i++) {
99: }
100: }
101: if (!N) return 0;
103: /* fix right hand side if needed */
104: if (x && b) {
105: Vec xt;
108: VecDuplicate(x, &xt);
109: VecCopy(x, xt);
110: VecScale(xt, -1.0);
111: MatMultAdd(A, xt, b, b);
112: VecDestroy(&xt);
113: VecGetArrayRead(x, &xx);
114: VecGetArray(b, &bb);
115: for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
116: VecRestoreArrayRead(x, &xx);
117: VecRestoreArray(b, &bb);
118: }
120: MatDenseGetArray(A, &v);
121: for (i = 0; i < N; i++) {
122: slot = v + rows[i] * m;
123: PetscArrayzero(slot, r);
124: }
125: for (i = 0; i < N; i++) {
126: slot = v + rows[i];
127: for (j = 0; j < n; j++) {
128: *slot = 0.0;
129: slot += m;
130: }
131: }
132: if (diag != 0.0) {
134: for (i = 0; i < N; i++) {
135: slot = v + (m + 1) * rows[i];
136: *slot = diag;
137: }
138: }
139: MatDenseRestoreArray(A, &v);
140: return 0;
141: }
143: PetscErrorCode MatPtAPNumeric_SeqDense_SeqDense(Mat A, Mat P, Mat C)
144: {
145: Mat_SeqDense *c = (Mat_SeqDense *)(C->data);
147: if (c->ptapwork) {
148: (*C->ops->matmultnumeric)(A, P, c->ptapwork);
149: (*C->ops->transposematmultnumeric)(P, c->ptapwork, C);
150: } else SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Must call MatPtAPSymbolic_SeqDense_SeqDense() first");
151: return 0;
152: }
154: PetscErrorCode MatPtAPSymbolic_SeqDense_SeqDense(Mat A, Mat P, PetscReal fill, Mat C)
155: {
156: Mat_SeqDense *c;
157: PetscBool cisdense = PETSC_FALSE;
159: MatSetSizes(C, P->cmap->n, P->cmap->n, P->cmap->N, P->cmap->N);
160: #if defined(PETSC_HAVE_CUDA)
161: PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
162: #elif (PETSC_HAVE_HIP)
163: PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "");
164: #endif
166: if (!cisdense) {
167: PetscBool flg;
169: PetscObjectTypeCompare((PetscObject)P, ((PetscObject)A)->type_name, &flg);
170: MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
171: }
172: MatSetUp(C);
173: c = (Mat_SeqDense *)C->data;
174: MatCreate(PetscObjectComm((PetscObject)A), &c->ptapwork);
175: MatSetSizes(c->ptapwork, A->rmap->n, P->cmap->n, A->rmap->N, P->cmap->N);
176: MatSetType(c->ptapwork, ((PetscObject)C)->type_name);
177: MatSetUp(c->ptapwork);
178: return 0;
179: }
181: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
182: {
183: Mat B = NULL;
184: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
185: Mat_SeqDense *b;
186: PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i;
187: const MatScalar *av;
188: PetscBool isseqdense;
190: if (reuse == MAT_REUSE_MATRIX) {
191: PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense);
193: }
194: if (reuse != MAT_REUSE_MATRIX) {
195: MatCreate(PetscObjectComm((PetscObject)A), &B);
196: MatSetSizes(B, m, n, m, n);
197: MatSetType(B, MATSEQDENSE);
198: MatSeqDenseSetPreallocation(B, NULL);
199: b = (Mat_SeqDense *)(B->data);
200: } else {
201: b = (Mat_SeqDense *)((*newmat)->data);
202: PetscArrayzero(b->v, m * n);
203: }
204: MatSeqAIJGetArrayRead(A, &av);
205: for (i = 0; i < m; i++) {
206: PetscInt j;
207: for (j = 0; j < ai[1] - ai[0]; j++) {
208: b->v[*aj * m + i] = *av;
209: aj++;
210: av++;
211: }
212: ai++;
213: }
214: MatSeqAIJRestoreArrayRead(A, &av);
216: if (reuse == MAT_INPLACE_MATRIX) {
217: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
218: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
219: MatHeaderReplace(A, &B);
220: } else {
221: if (B) *newmat = B;
222: MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY);
223: MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY);
224: }
225: return 0;
226: }
228: PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
229: {
230: Mat B = NULL;
231: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
232: PetscInt i, j;
233: PetscInt *rows, *nnz;
234: MatScalar *aa = a->v, *vals;
236: PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals);
237: if (reuse != MAT_REUSE_MATRIX) {
238: MatCreate(PetscObjectComm((PetscObject)A), &B);
239: MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
240: MatSetType(B, MATSEQAIJ);
241: for (j = 0; j < A->cmap->n; j++) {
242: for (i = 0; i < A->rmap->n; i++)
243: if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i];
244: aa += a->lda;
245: }
246: MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz);
247: } else B = *newmat;
248: aa = a->v;
249: for (j = 0; j < A->cmap->n; j++) {
250: PetscInt numRows = 0;
251: for (i = 0; i < A->rmap->n; i++)
252: if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) {
253: rows[numRows] = i;
254: vals[numRows++] = aa[i];
255: }
256: MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES);
257: aa += a->lda;
258: }
259: PetscFree3(rows, nnz, vals);
260: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
261: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
263: if (reuse == MAT_INPLACE_MATRIX) {
264: MatHeaderReplace(A, &B);
265: } else if (reuse != MAT_REUSE_MATRIX) *newmat = B;
266: return 0;
267: }
269: PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
270: {
271: Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data;
272: const PetscScalar *xv;
273: PetscScalar *yv;
274: PetscBLASInt N, m, ldax = 0, lday = 0, one = 1;
276: MatDenseGetArrayRead(X, &xv);
277: MatDenseGetArray(Y, &yv);
278: PetscBLASIntCast(X->rmap->n * X->cmap->n, &N);
279: PetscBLASIntCast(X->rmap->n, &m);
280: PetscBLASIntCast(x->lda, &ldax);
281: PetscBLASIntCast(y->lda, &lday);
282: if (ldax > m || lday > m) {
283: PetscInt j;
285: for (j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, xv + j * ldax, &one, yv + j * lday, &one));
286: } else {
287: PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one));
288: }
289: MatDenseRestoreArrayRead(X, &xv);
290: MatDenseRestoreArray(Y, &yv);
291: PetscLogFlops(PetscMax(2.0 * N - 1, 0));
292: return 0;
293: }
295: static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info)
296: {
297: PetscLogDouble N = A->rmap->n * A->cmap->n;
299: info->block_size = 1.0;
300: info->nz_allocated = N;
301: info->nz_used = N;
302: info->nz_unneeded = 0;
303: info->assemblies = A->num_ass;
304: info->mallocs = 0;
305: info->memory = 0; /* REVIEW ME */
306: info->fill_ratio_given = 0;
307: info->fill_ratio_needed = 0;
308: info->factor_mallocs = 0;
309: return 0;
310: }
312: PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha)
313: {
314: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
315: PetscScalar *v;
316: PetscBLASInt one = 1, j, nz, lda = 0;
318: MatDenseGetArray(A, &v);
319: PetscBLASIntCast(a->lda, &lda);
320: if (lda > A->rmap->n) {
321: PetscBLASIntCast(A->rmap->n, &nz);
322: for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one));
323: } else {
324: PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz);
325: PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one));
326: }
327: PetscLogFlops(A->rmap->n * A->cmap->n);
328: MatDenseRestoreArray(A, &v);
329: return 0;
330: }
332: PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha)
333: {
334: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
335: PetscScalar *v;
336: PetscInt j, k;
338: MatDenseGetArray(A, &v);
339: k = PetscMin(A->rmap->n, A->cmap->n);
340: for (j = 0; j < k; j++) v[j + j * a->lda] += alpha;
341: PetscLogFlops(k);
342: MatDenseRestoreArray(A, &v);
343: return 0;
344: }
346: static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
347: {
348: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
349: PetscInt i, j, m = A->rmap->n, N = a->lda;
350: const PetscScalar *v;
352: *fl = PETSC_FALSE;
353: if (A->rmap->n != A->cmap->n) return 0;
354: MatDenseGetArrayRead(A, &v);
355: for (i = 0; i < m; i++) {
356: for (j = i; j < m; j++) {
357: if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore;
358: }
359: }
360: *fl = PETSC_TRUE;
361: restore:
362: MatDenseRestoreArrayRead(A, &v);
363: return 0;
364: }
366: static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
367: {
368: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
369: PetscInt i, j, m = A->rmap->n, N = a->lda;
370: const PetscScalar *v;
372: *fl = PETSC_FALSE;
373: if (A->rmap->n != A->cmap->n) return 0;
374: MatDenseGetArrayRead(A, &v);
375: for (i = 0; i < m; i++) {
376: for (j = i; j < m; j++) {
377: if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore;
378: }
379: }
380: *fl = PETSC_TRUE;
381: restore:
382: MatDenseRestoreArrayRead(A, &v);
383: return 0;
384: }
386: PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues)
387: {
388: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
389: PetscInt lda = (PetscInt)mat->lda, j, m, nlda = lda;
390: PetscBool isdensecpu;
392: PetscLayoutReference(A->rmap, &newi->rmap);
393: PetscLayoutReference(A->cmap, &newi->cmap);
394: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */
395: MatDenseSetLDA(newi, lda);
396: }
397: PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu);
398: if (isdensecpu) MatSeqDenseSetPreallocation(newi, NULL);
399: if (cpvalues == MAT_COPY_VALUES) {
400: const PetscScalar *av;
401: PetscScalar *v;
403: MatDenseGetArrayRead(A, &av);
404: MatDenseGetArrayWrite(newi, &v);
405: MatDenseGetLDA(newi, &nlda);
406: m = A->rmap->n;
407: if (lda > m || nlda > m) {
408: for (j = 0; j < A->cmap->n; j++) PetscArraycpy(v + j * nlda, av + j * lda, m);
409: } else {
410: PetscArraycpy(v, av, A->rmap->n * A->cmap->n);
411: }
412: MatDenseRestoreArrayWrite(newi, &v);
413: MatDenseRestoreArrayRead(A, &av);
414: }
415: return 0;
416: }
418: PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
419: {
420: MatCreate(PetscObjectComm((PetscObject)A), newmat);
421: MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n);
422: MatSetType(*newmat, ((PetscObject)A)->type_name);
423: MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues);
424: return 0;
425: }
427: static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
428: {
429: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
430: PetscBLASInt info;
432: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
433: PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
434: PetscFPTrapPop();
436: PetscLogFlops(nrhs * (2.0 * m * m - m));
437: return 0;
438: }
440: static PetscErrorCode MatConjugate_SeqDense(Mat);
442: static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
443: {
444: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
445: PetscBLASInt info;
447: if (A->spd == PETSC_BOOL3_TRUE) {
448: if (PetscDefined(USE_COMPLEX) && T) MatConjugate_SeqDense(A);
449: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
450: PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info));
451: PetscFPTrapPop();
453: if (PetscDefined(USE_COMPLEX) && T) MatConjugate_SeqDense(A);
454: #if defined(PETSC_USE_COMPLEX)
455: } else if (A->hermitian == PETSC_BOOL3_TRUE) {
456: if (T) MatConjugate_SeqDense(A);
457: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
458: PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
459: PetscFPTrapPop();
461: if (T) MatConjugate_SeqDense(A);
462: #endif
463: } else { /* symmetric case */
464: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
465: PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
466: PetscFPTrapPop();
468: }
469: PetscLogFlops(nrhs * (2.0 * m * m - m));
470: return 0;
471: }
473: static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
474: {
475: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
476: PetscBLASInt info;
477: char trans;
479: if (PetscDefined(USE_COMPLEX)) {
480: trans = 'C';
481: } else {
482: trans = 'T';
483: }
484: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
485: { /* lwork depends on the number of right-hand sides */
486: PetscBLASInt nlfwork, lfwork = -1;
487: PetscScalar fwork;
489: PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
490: nlfwork = (PetscBLASInt)PetscRealPart(fwork);
491: if (nlfwork > mat->lfwork) {
492: mat->lfwork = nlfwork;
493: PetscFree(mat->fwork);
494: PetscMalloc1(mat->lfwork, &mat->fwork);
495: }
496: }
497: PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
498: PetscFPTrapPop();
500: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
501: PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
502: PetscFPTrapPop();
504: for (PetscInt j = 0; j < nrhs; j++) {
505: for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.;
506: }
507: PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)));
508: return 0;
509: }
511: static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
512: {
513: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
514: PetscBLASInt info;
516: if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) {
517: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
518: PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
519: PetscFPTrapPop();
521: if (PetscDefined(USE_COMPLEX)) MatConjugate_SeqDense(A);
522: { /* lwork depends on the number of right-hand sides */
523: PetscBLASInt nlfwork, lfwork = -1;
524: PetscScalar fwork;
526: PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
527: nlfwork = (PetscBLASInt)PetscRealPart(fwork);
528: if (nlfwork > mat->lfwork) {
529: mat->lfwork = nlfwork;
530: PetscFree(mat->fwork);
531: PetscMalloc1(mat->lfwork, &mat->fwork);
532: }
533: }
534: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
535: PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
536: PetscFPTrapPop();
538: if (PetscDefined(USE_COMPLEX)) MatConjugate_SeqDense(A);
539: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve");
540: PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)));
541: return 0;
542: }
544: static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
545: {
546: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
547: PetscScalar *y;
548: PetscBLASInt m = 0, k = 0;
550: PetscBLASIntCast(A->rmap->n, &m);
551: PetscBLASIntCast(A->cmap->n, &k);
552: if (k < m) {
553: VecCopy(xx, mat->qrrhs);
554: VecGetArray(mat->qrrhs, &y);
555: } else {
556: VecCopy(xx, yy);
557: VecGetArray(yy, &y);
558: }
559: *_y = y;
560: *_k = k;
561: *_m = m;
562: return 0;
563: }
565: static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
566: {
567: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
568: PetscScalar *y = NULL;
569: PetscBLASInt m, k;
571: y = *_y;
572: *_y = NULL;
573: k = *_k;
574: m = *_m;
575: if (k < m) {
576: PetscScalar *yv;
577: VecGetArray(yy, &yv);
578: PetscArraycpy(yv, y, k);
579: VecRestoreArray(yy, &yv);
580: VecRestoreArray(mat->qrrhs, &y);
581: } else {
582: VecRestoreArray(yy, &y);
583: }
584: return 0;
585: }
587: static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy)
588: {
589: PetscScalar *y = NULL;
590: PetscBLASInt m = 0, k = 0;
592: MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
593: MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE);
594: MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
595: return 0;
596: }
598: static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy)
599: {
600: PetscScalar *y = NULL;
601: PetscBLASInt m = 0, k = 0;
603: MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
604: MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE);
605: MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
606: return 0;
607: }
609: static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
610: {
611: PetscScalar *y = NULL;
612: PetscBLASInt m = 0, k = 0;
614: MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
615: MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE);
616: MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
617: return 0;
618: }
620: static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
621: {
622: PetscScalar *y = NULL;
623: PetscBLASInt m = 0, k = 0;
625: MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
626: MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE);
627: MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
628: return 0;
629: }
631: static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy)
632: {
633: PetscScalar *y = NULL;
634: PetscBLASInt m = 0, k = 0;
636: MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
637: MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k);
638: MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
639: return 0;
640: }
642: static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy)
643: {
644: PetscScalar *y = NULL;
645: PetscBLASInt m = 0, k = 0;
647: MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
648: MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k);
649: MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
650: return 0;
651: }
653: static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
654: {
655: const PetscScalar *b;
656: PetscScalar *y;
657: PetscInt n, _ldb, _ldx;
658: PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0;
660: *_ldy = 0;
661: *_m = 0;
662: *_nrhs = 0;
663: *_k = 0;
664: *_y = NULL;
665: PetscBLASIntCast(A->rmap->n, &m);
666: PetscBLASIntCast(A->cmap->n, &k);
667: MatGetSize(B, NULL, &n);
668: PetscBLASIntCast(n, &nrhs);
669: MatDenseGetLDA(B, &_ldb);
670: PetscBLASIntCast(_ldb, &ldb);
671: MatDenseGetLDA(X, &_ldx);
672: PetscBLASIntCast(_ldx, &ldx);
673: if (ldx < m) {
674: MatDenseGetArrayRead(B, &b);
675: PetscMalloc1(nrhs * m, &y);
676: if (ldb == m) {
677: PetscArraycpy(y, b, ldb * nrhs);
678: } else {
679: for (PetscInt j = 0; j < nrhs; j++) PetscArraycpy(&y[j * m], &b[j * ldb], m);
680: }
681: ldy = m;
682: MatDenseRestoreArrayRead(B, &b);
683: } else {
684: if (ldb == ldx) {
685: MatCopy(B, X, SAME_NONZERO_PATTERN);
686: MatDenseGetArray(X, &y);
687: } else {
688: MatDenseGetArray(X, &y);
689: MatDenseGetArrayRead(B, &b);
690: for (PetscInt j = 0; j < nrhs; j++) PetscArraycpy(&y[j * ldx], &b[j * ldb], m);
691: MatDenseRestoreArrayRead(B, &b);
692: }
693: ldy = ldx;
694: }
695: *_y = y;
696: *_ldy = ldy;
697: *_k = k;
698: *_m = m;
699: *_nrhs = nrhs;
700: return 0;
701: }
703: static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
704: {
705: PetscScalar *y;
706: PetscInt _ldx;
707: PetscBLASInt k, ldy, nrhs, ldx = 0;
709: y = *_y;
710: *_y = NULL;
711: k = *_k;
712: ldy = *_ldy;
713: nrhs = *_nrhs;
714: MatDenseGetLDA(X, &_ldx);
715: PetscBLASIntCast(_ldx, &ldx);
716: if (ldx != ldy) {
717: PetscScalar *xv;
718: MatDenseGetArray(X, &xv);
719: for (PetscInt j = 0; j < nrhs; j++) PetscArraycpy(&xv[j * ldx], &y[j * ldy], k);
720: MatDenseRestoreArray(X, &xv);
721: PetscFree(y);
722: } else {
723: MatDenseRestoreArray(X, &y);
724: }
725: return 0;
726: }
728: static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X)
729: {
730: PetscScalar *y;
731: PetscBLASInt m, k, ldy, nrhs;
733: MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
734: MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE);
735: MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
736: return 0;
737: }
739: static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X)
740: {
741: PetscScalar *y;
742: PetscBLASInt m, k, ldy, nrhs;
744: MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
745: MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE);
746: MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
747: return 0;
748: }
750: static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X)
751: {
752: PetscScalar *y;
753: PetscBLASInt m, k, ldy, nrhs;
755: MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
756: MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE);
757: MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
758: return 0;
759: }
761: static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X)
762: {
763: PetscScalar *y;
764: PetscBLASInt m, k, ldy, nrhs;
766: MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
767: MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE);
768: MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
769: return 0;
770: }
772: static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X)
773: {
774: PetscScalar *y;
775: PetscBLASInt m, k, ldy, nrhs;
777: MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
778: MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k);
779: MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
780: return 0;
781: }
783: static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X)
784: {
785: PetscScalar *y;
786: PetscBLASInt m, k, ldy, nrhs;
788: MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
789: MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k);
790: MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
791: return 0;
792: }
794: /* ---------------------------------------------------------------*/
795: /* COMMENT: I have chosen to hide row permutation in the pivots,
796: rather than put it in the Mat->row slot.*/
797: PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo)
798: {
799: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
800: PetscBLASInt n, m, info;
802: PetscBLASIntCast(A->cmap->n, &n);
803: PetscBLASIntCast(A->rmap->n, &m);
804: if (!mat->pivots) { PetscMalloc1(A->rmap->n, &mat->pivots); }
805: if (!A->rmap->n || !A->cmap->n) return 0;
806: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
807: PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info));
808: PetscFPTrapPop();
813: A->ops->solve = MatSolve_SeqDense_LU;
814: A->ops->matsolve = MatMatSolve_SeqDense_LU;
815: A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU;
816: A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU;
817: A->factortype = MAT_FACTOR_LU;
819: PetscFree(A->solvertype);
820: PetscStrallocpy(MATSOLVERPETSC, &A->solvertype);
822: PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3);
823: return 0;
824: }
826: static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
827: {
828: MatFactorInfo info;
830: MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES);
831: PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info);
832: return 0;
833: }
835: PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
836: {
837: fact->preallocated = PETSC_TRUE;
838: fact->assembled = PETSC_TRUE;
839: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense;
840: return 0;
841: }
843: /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */
844: PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo)
845: {
846: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
847: PetscBLASInt info, n;
849: PetscBLASIntCast(A->cmap->n, &n);
850: if (!A->rmap->n || !A->cmap->n) return 0;
851: if (A->spd == PETSC_BOOL3_TRUE) {
852: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
853: PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info));
854: PetscFPTrapPop();
855: #if defined(PETSC_USE_COMPLEX)
856: } else if (A->hermitian == PETSC_BOOL3_TRUE) {
857: if (!mat->pivots) { PetscMalloc1(A->rmap->n, &mat->pivots); }
858: if (!mat->fwork) {
859: PetscScalar dummy;
861: mat->lfwork = -1;
862: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
863: PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
864: PetscFPTrapPop();
865: mat->lfwork = (PetscInt)PetscRealPart(dummy);
866: PetscMalloc1(mat->lfwork, &mat->fwork);
867: }
868: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
869: PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
870: PetscFPTrapPop();
871: #endif
872: } else { /* symmetric case */
873: if (!mat->pivots) { PetscMalloc1(A->rmap->n, &mat->pivots); }
874: if (!mat->fwork) {
875: PetscScalar dummy;
877: mat->lfwork = -1;
878: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
879: PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
880: PetscFPTrapPop();
881: mat->lfwork = (PetscInt)PetscRealPart(dummy);
882: PetscMalloc1(mat->lfwork, &mat->fwork);
883: }
884: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
885: PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
886: PetscFPTrapPop();
887: }
890: A->ops->solve = MatSolve_SeqDense_Cholesky;
891: A->ops->matsolve = MatMatSolve_SeqDense_Cholesky;
892: A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky;
893: A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky;
894: A->factortype = MAT_FACTOR_CHOLESKY;
896: PetscFree(A->solvertype);
897: PetscStrallocpy(MATSOLVERPETSC, &A->solvertype);
899: PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0);
900: return 0;
901: }
903: static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
904: {
905: MatFactorInfo info;
907: info.fill = 1.0;
909: MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES);
910: PetscUseTypeMethod(fact, choleskyfactor, NULL, &info);
911: return 0;
912: }
914: PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
915: {
916: fact->assembled = PETSC_TRUE;
917: fact->preallocated = PETSC_TRUE;
918: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense;
919: return 0;
920: }
922: PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo)
923: {
924: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
925: PetscBLASInt n, m, info, min, max;
927: PetscBLASIntCast(A->cmap->n, &n);
928: PetscBLASIntCast(A->rmap->n, &m);
929: max = PetscMax(m, n);
930: min = PetscMin(m, n);
931: if (!mat->tau) { PetscMalloc1(min, &mat->tau); }
932: if (!mat->pivots) { PetscMalloc1(n, &mat->pivots); }
933: if (!mat->qrrhs) MatCreateVecs(A, NULL, &(mat->qrrhs));
934: if (!A->rmap->n || !A->cmap->n) return 0;
935: if (!mat->fwork) {
936: PetscScalar dummy;
938: mat->lfwork = -1;
939: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
940: PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info));
941: PetscFPTrapPop();
942: mat->lfwork = (PetscInt)PetscRealPart(dummy);
943: PetscMalloc1(mat->lfwork, &mat->fwork);
944: }
945: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
946: PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info));
947: PetscFPTrapPop();
949: // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n
950: mat->rank = min;
952: A->ops->solve = MatSolve_SeqDense_QR;
953: A->ops->matsolve = MatMatSolve_SeqDense_QR;
954: A->factortype = MAT_FACTOR_QR;
955: if (m == n) {
956: A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR;
957: A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR;
958: }
960: PetscFree(A->solvertype);
961: PetscStrallocpy(MATSOLVERPETSC, &A->solvertype);
963: PetscLogFlops(2.0 * min * min * (max - min / 3.0));
964: return 0;
965: }
967: static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
968: {
969: MatFactorInfo info;
971: info.fill = 1.0;
973: MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES);
974: PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info));
975: return 0;
976: }
978: PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
979: {
980: fact->assembled = PETSC_TRUE;
981: fact->preallocated = PETSC_TRUE;
982: PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense);
983: return 0;
984: }
986: /* uses LAPACK */
987: PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact)
988: {
989: MatCreate(PetscObjectComm((PetscObject)A), fact);
990: MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n);
991: MatSetType(*fact, MATDENSE);
992: (*fact)->trivialsymbolic = PETSC_TRUE;
993: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
994: (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense;
995: (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense;
996: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
997: (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense;
998: } else if (ftype == MAT_FACTOR_QR) {
999: PetscObjectComposeFunction((PetscObject)(*fact), "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense);
1000: }
1001: (*fact)->factortype = ftype;
1003: PetscFree((*fact)->solvertype);
1004: PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype);
1005: PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU]);
1006: PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU]);
1007: PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY]);
1008: PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC]);
1009: return 0;
1010: }
1012: /* ------------------------------------------------------------------*/
1013: static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx)
1014: {
1015: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1016: PetscScalar *x, *v = mat->v, zero = 0.0, xt;
1017: const PetscScalar *b;
1018: PetscInt m = A->rmap->n, i;
1019: PetscBLASInt o = 1, bm = 0;
1021: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1023: #endif
1024: if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */
1025: PetscBLASIntCast(m, &bm);
1026: if (flag & SOR_ZERO_INITIAL_GUESS) {
1027: /* this is a hack fix, should have another version without the second BLASdotu */
1028: VecSet(xx, zero);
1029: }
1030: VecGetArray(xx, &x);
1031: VecGetArrayRead(bb, &b);
1032: its = its * lits;
1034: while (its--) {
1035: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1036: for (i = 0; i < m; i++) {
1037: PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
1038: x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift);
1039: }
1040: }
1041: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1042: for (i = m - 1; i >= 0; i--) {
1043: PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
1044: x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift);
1045: }
1046: }
1047: }
1048: VecRestoreArrayRead(bb, &b);
1049: VecRestoreArray(xx, &x);
1050: return 0;
1051: }
1053: /* -----------------------------------------------------------------*/
1054: PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy)
1055: {
1056: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1057: const PetscScalar *v = mat->v, *x;
1058: PetscScalar *y;
1059: PetscBLASInt m, n, _One = 1;
1060: PetscScalar _DOne = 1.0, _DZero = 0.0;
1062: PetscBLASIntCast(A->rmap->n, &m);
1063: PetscBLASIntCast(A->cmap->n, &n);
1064: VecGetArrayRead(xx, &x);
1065: VecGetArrayWrite(yy, &y);
1066: if (!A->rmap->n || !A->cmap->n) {
1067: PetscBLASInt i;
1068: for (i = 0; i < n; i++) y[i] = 0.0;
1069: } else {
1070: PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &mat->lda, x, &_One, &_DZero, y, &_One));
1071: PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->cmap->n);
1072: }
1073: VecRestoreArrayRead(xx, &x);
1074: VecRestoreArrayWrite(yy, &y);
1075: return 0;
1076: }
1078: PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy)
1079: {
1080: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1081: PetscScalar *y, _DOne = 1.0, _DZero = 0.0;
1082: PetscBLASInt m, n, _One = 1;
1083: const PetscScalar *v = mat->v, *x;
1085: PetscBLASIntCast(A->rmap->n, &m);
1086: PetscBLASIntCast(A->cmap->n, &n);
1087: VecGetArrayRead(xx, &x);
1088: VecGetArrayWrite(yy, &y);
1089: if (!A->rmap->n || !A->cmap->n) {
1090: PetscBLASInt i;
1091: for (i = 0; i < m; i++) y[i] = 0.0;
1092: } else {
1093: PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DZero, y, &_One));
1094: PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->rmap->n);
1095: }
1096: VecRestoreArrayRead(xx, &x);
1097: VecRestoreArrayWrite(yy, &y);
1098: return 0;
1099: }
1101: PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1102: {
1103: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1104: const PetscScalar *v = mat->v, *x;
1105: PetscScalar *y, _DOne = 1.0;
1106: PetscBLASInt m, n, _One = 1;
1108: PetscBLASIntCast(A->rmap->n, &m);
1109: PetscBLASIntCast(A->cmap->n, &n);
1110: VecCopy(zz, yy);
1111: if (!A->rmap->n || !A->cmap->n) return 0;
1112: VecGetArrayRead(xx, &x);
1113: VecGetArray(yy, &y);
1114: PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One));
1115: VecRestoreArrayRead(xx, &x);
1116: VecRestoreArray(yy, &y);
1117: PetscLogFlops(2.0 * A->rmap->n * A->cmap->n);
1118: return 0;
1119: }
1121: PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1122: {
1123: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1124: const PetscScalar *v = mat->v, *x;
1125: PetscScalar *y;
1126: PetscBLASInt m, n, _One = 1;
1127: PetscScalar _DOne = 1.0;
1129: PetscBLASIntCast(A->rmap->n, &m);
1130: PetscBLASIntCast(A->cmap->n, &n);
1131: VecCopy(zz, yy);
1132: if (!A->rmap->n || !A->cmap->n) return 0;
1133: VecGetArrayRead(xx, &x);
1134: VecGetArray(yy, &y);
1135: PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One));
1136: VecRestoreArrayRead(xx, &x);
1137: VecRestoreArray(yy, &y);
1138: PetscLogFlops(2.0 * A->rmap->n * A->cmap->n);
1139: return 0;
1140: }
1142: /* -----------------------------------------------------------------*/
1143: static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
1144: {
1145: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1146: PetscInt i;
1148: *ncols = A->cmap->n;
1149: if (cols) {
1150: PetscMalloc1(A->cmap->n, cols);
1151: for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i;
1152: }
1153: if (vals) {
1154: const PetscScalar *v;
1156: MatDenseGetArrayRead(A, &v);
1157: PetscMalloc1(A->cmap->n, vals);
1158: v += row;
1159: for (i = 0; i < A->cmap->n; i++) {
1160: (*vals)[i] = *v;
1161: v += mat->lda;
1162: }
1163: MatDenseRestoreArrayRead(A, &v);
1164: }
1165: return 0;
1166: }
1168: static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
1169: {
1170: if (ncols) *ncols = 0;
1171: if (cols) PetscFree(*cols);
1172: if (vals) PetscFree(*vals);
1173: return 0;
1174: }
1175: /* ----------------------------------------------------------------*/
1176: static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv)
1177: {
1178: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1179: PetscScalar *av;
1180: PetscInt i, j, idx = 0;
1181: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1182: PetscOffloadMask oldf;
1183: #endif
1185: MatDenseGetArray(A, &av);
1186: if (!mat->roworiented) {
1187: if (addv == INSERT_VALUES) {
1188: for (j = 0; j < n; j++) {
1189: if (indexn[j] < 0) {
1190: idx += m;
1191: continue;
1192: }
1194: for (i = 0; i < m; i++) {
1195: if (indexm[i] < 0) {
1196: idx++;
1197: continue;
1198: }
1200: av[indexn[j] * mat->lda + indexm[i]] = v[idx++];
1201: }
1202: }
1203: } else {
1204: for (j = 0; j < n; j++) {
1205: if (indexn[j] < 0) {
1206: idx += m;
1207: continue;
1208: }
1210: for (i = 0; i < m; i++) {
1211: if (indexm[i] < 0) {
1212: idx++;
1213: continue;
1214: }
1216: av[indexn[j] * mat->lda + indexm[i]] += v[idx++];
1217: }
1218: }
1219: }
1220: } else {
1221: if (addv == INSERT_VALUES) {
1222: for (i = 0; i < m; i++) {
1223: if (indexm[i] < 0) {
1224: idx += n;
1225: continue;
1226: }
1228: for (j = 0; j < n; j++) {
1229: if (indexn[j] < 0) {
1230: idx++;
1231: continue;
1232: }
1234: av[indexn[j] * mat->lda + indexm[i]] = v[idx++];
1235: }
1236: }
1237: } else {
1238: for (i = 0; i < m; i++) {
1239: if (indexm[i] < 0) {
1240: idx += n;
1241: continue;
1242: }
1244: for (j = 0; j < n; j++) {
1245: if (indexn[j] < 0) {
1246: idx++;
1247: continue;
1248: }
1250: av[indexn[j] * mat->lda + indexm[i]] += v[idx++];
1251: }
1252: }
1253: }
1254: }
1255: /* hack to prevent unneeded copy to the GPU while returning the array */
1256: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1257: oldf = A->offloadmask;
1258: A->offloadmask = PETSC_OFFLOAD_GPU;
1259: #endif
1260: MatDenseRestoreArray(A, &av);
1261: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1262: A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU);
1263: #endif
1264: return 0;
1265: }
1267: static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[])
1268: {
1269: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1270: const PetscScalar *vv;
1271: PetscInt i, j;
1273: MatDenseGetArrayRead(A, &vv);
1274: /* row-oriented output */
1275: for (i = 0; i < m; i++) {
1276: if (indexm[i] < 0) {
1277: v += n;
1278: continue;
1279: }
1281: for (j = 0; j < n; j++) {
1282: if (indexn[j] < 0) {
1283: v++;
1284: continue;
1285: }
1287: *v++ = vv[indexn[j] * mat->lda + indexm[i]];
1288: }
1289: }
1290: MatDenseRestoreArrayRead(A, &vv);
1291: return 0;
1292: }
1294: /* -----------------------------------------------------------------*/
1296: PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer)
1297: {
1298: PetscBool skipHeader;
1299: PetscViewerFormat format;
1300: PetscInt header[4], M, N, m, lda, i, j, k;
1301: const PetscScalar *v;
1302: PetscScalar *vwork;
1304: PetscViewerSetUp(viewer);
1305: PetscViewerBinaryGetSkipHeader(viewer, &skipHeader);
1306: PetscViewerGetFormat(viewer, &format);
1307: if (skipHeader) format = PETSC_VIEWER_NATIVE;
1309: MatGetSize(mat, &M, &N);
1311: /* write matrix header */
1312: header[0] = MAT_FILE_CLASSID;
1313: header[1] = M;
1314: header[2] = N;
1315: header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N;
1316: if (!skipHeader) PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT);
1318: MatGetLocalSize(mat, &m, NULL);
1319: if (format != PETSC_VIEWER_NATIVE) {
1320: PetscInt nnz = m * N, *iwork;
1321: /* store row lengths for each row */
1322: PetscMalloc1(nnz, &iwork);
1323: for (i = 0; i < m; i++) iwork[i] = N;
1324: PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1325: /* store column indices (zero start index) */
1326: for (k = 0, i = 0; i < m; i++)
1327: for (j = 0; j < N; j++, k++) iwork[k] = j;
1328: PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1329: PetscFree(iwork);
1330: }
1331: /* store matrix values as a dense matrix in row major order */
1332: PetscMalloc1(m * N, &vwork);
1333: MatDenseGetArrayRead(mat, &v);
1334: MatDenseGetLDA(mat, &lda);
1335: for (k = 0, i = 0; i < m; i++)
1336: for (j = 0; j < N; j++, k++) vwork[k] = v[i + lda * j];
1337: MatDenseRestoreArrayRead(mat, &v);
1338: PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
1339: PetscFree(vwork);
1340: return 0;
1341: }
1343: PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer)
1344: {
1345: PetscBool skipHeader;
1346: PetscInt header[4], M, N, m, nz, lda, i, j, k;
1347: PetscInt rows, cols;
1348: PetscScalar *v, *vwork;
1350: PetscViewerSetUp(viewer);
1351: PetscViewerBinaryGetSkipHeader(viewer, &skipHeader);
1353: if (!skipHeader) {
1354: PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT);
1356: M = header[1];
1357: N = header[2];
1360: nz = header[3];
1362: } else {
1363: MatGetSize(mat, &M, &N);
1365: nz = MATRIX_BINARY_FORMAT_DENSE;
1366: }
1368: /* setup global sizes if not set */
1369: if (mat->rmap->N < 0) mat->rmap->N = M;
1370: if (mat->cmap->N < 0) mat->cmap->N = N;
1371: MatSetUp(mat);
1372: /* check if global sizes are correct */
1373: MatGetSize(mat, &rows, &cols);
1376: MatGetSize(mat, NULL, &N);
1377: MatGetLocalSize(mat, &m, NULL);
1378: MatDenseGetArray(mat, &v);
1379: MatDenseGetLDA(mat, &lda);
1380: if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */
1381: PetscInt nnz = m * N;
1382: /* read in matrix values */
1383: PetscMalloc1(nnz, &vwork);
1384: PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
1385: /* store values in column major order */
1386: for (j = 0; j < N; j++)
1387: for (i = 0; i < m; i++) v[i + lda * j] = vwork[i * N + j];
1388: PetscFree(vwork);
1389: } else { /* matrix in file is sparse format */
1390: PetscInt nnz = 0, *rlens, *icols;
1391: /* read in row lengths */
1392: PetscMalloc1(m, &rlens);
1393: PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1394: for (i = 0; i < m; i++) nnz += rlens[i];
1395: /* read in column indices and values */
1396: PetscMalloc2(nnz, &icols, nnz, &vwork);
1397: PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1398: PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
1399: /* store values in column major order */
1400: for (k = 0, i = 0; i < m; i++)
1401: for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k];
1402: PetscFree(rlens);
1403: PetscFree2(icols, vwork);
1404: }
1405: MatDenseRestoreArray(mat, &v);
1406: MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY);
1407: MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY);
1408: return 0;
1409: }
1411: PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer)
1412: {
1413: PetscBool isbinary, ishdf5;
1417: /* force binary viewer to load .info file if it has not yet done so */
1418: PetscViewerSetUp(viewer);
1419: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
1420: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5);
1421: if (isbinary) {
1422: MatLoad_Dense_Binary(newMat, viewer);
1423: } else if (ishdf5) {
1424: #if defined(PETSC_HAVE_HDF5)
1425: MatLoad_Dense_HDF5(newMat, viewer);
1426: #else
1427: SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
1428: #endif
1429: } else {
1430: SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1431: }
1432: return 0;
1433: }
1435: static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer)
1436: {
1437: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1438: PetscInt i, j;
1439: const char *name;
1440: PetscScalar *v, *av;
1441: PetscViewerFormat format;
1442: #if defined(PETSC_USE_COMPLEX)
1443: PetscBool allreal = PETSC_TRUE;
1444: #endif
1446: MatDenseGetArrayRead(A, (const PetscScalar **)&av);
1447: PetscViewerGetFormat(viewer, &format);
1448: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1449: return 0; /* do nothing for now */
1450: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1451: PetscViewerASCIIUseTabs(viewer, PETSC_FALSE);
1452: for (i = 0; i < A->rmap->n; i++) {
1453: v = av + i;
1454: PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i);
1455: for (j = 0; j < A->cmap->n; j++) {
1456: #if defined(PETSC_USE_COMPLEX)
1457: if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) {
1458: PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v));
1459: } else if (PetscRealPart(*v)) {
1460: PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v));
1461: }
1462: #else
1463: if (*v) PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v);
1464: #endif
1465: v += a->lda;
1466: }
1467: PetscViewerASCIIPrintf(viewer, "\n");
1468: }
1469: PetscViewerASCIIUseTabs(viewer, PETSC_TRUE);
1470: } else {
1471: PetscViewerASCIIUseTabs(viewer, PETSC_FALSE);
1472: #if defined(PETSC_USE_COMPLEX)
1473: /* determine if matrix has all real values */
1474: for (j = 0; j < A->cmap->n; j++) {
1475: v = av + j * a->lda;
1476: for (i = 0; i < A->rmap->n; i++) {
1477: if (PetscImaginaryPart(v[i])) {
1478: allreal = PETSC_FALSE;
1479: break;
1480: }
1481: }
1482: }
1483: #endif
1484: if (format == PETSC_VIEWER_ASCII_MATLAB) {
1485: PetscObjectGetName((PetscObject)A, &name);
1486: PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n);
1487: PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n);
1488: PetscViewerASCIIPrintf(viewer, "%s = [\n", name);
1489: }
1491: for (i = 0; i < A->rmap->n; i++) {
1492: v = av + i;
1493: for (j = 0; j < A->cmap->n; j++) {
1494: #if defined(PETSC_USE_COMPLEX)
1495: if (allreal) {
1496: PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v));
1497: } else {
1498: PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v));
1499: }
1500: #else
1501: PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v);
1502: #endif
1503: v += a->lda;
1504: }
1505: PetscViewerASCIIPrintf(viewer, "\n");
1506: }
1507: if (format == PETSC_VIEWER_ASCII_MATLAB) PetscViewerASCIIPrintf(viewer, "];\n");
1508: PetscViewerASCIIUseTabs(viewer, PETSC_TRUE);
1509: }
1510: MatDenseRestoreArrayRead(A, (const PetscScalar **)&av);
1511: PetscViewerFlush(viewer);
1512: return 0;
1513: }
1515: #include <petscdraw.h>
1516: static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa)
1517: {
1518: Mat A = (Mat)Aa;
1519: PetscInt m = A->rmap->n, n = A->cmap->n, i, j;
1520: int color = PETSC_DRAW_WHITE;
1521: const PetscScalar *v;
1522: PetscViewer viewer;
1523: PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1524: PetscViewerFormat format;
1526: PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer);
1527: PetscViewerGetFormat(viewer, &format);
1528: PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr);
1530: /* Loop over matrix elements drawing boxes */
1531: MatDenseGetArrayRead(A, &v);
1532: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1533: PetscDrawCollectiveBegin(draw);
1534: /* Blue for negative and Red for positive */
1535: for (j = 0; j < n; j++) {
1536: x_l = j;
1537: x_r = x_l + 1.0;
1538: for (i = 0; i < m; i++) {
1539: y_l = m - i - 1.0;
1540: y_r = y_l + 1.0;
1541: if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED;
1542: else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE;
1543: else continue;
1544: PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color);
1545: }
1546: }
1547: PetscDrawCollectiveEnd(draw);
1548: } else {
1549: /* use contour shading to indicate magnitude of values */
1550: /* first determine max of all nonzero values */
1551: PetscReal minv = 0.0, maxv = 0.0;
1552: PetscDraw popup;
1554: for (i = 0; i < m * n; i++) {
1555: if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]);
1556: }
1557: if (minv >= maxv) maxv = minv + PETSC_SMALL;
1558: PetscDrawGetPopup(draw, &popup);
1559: PetscDrawScalePopup(popup, minv, maxv);
1561: PetscDrawCollectiveBegin(draw);
1562: for (j = 0; j < n; j++) {
1563: x_l = j;
1564: x_r = x_l + 1.0;
1565: for (i = 0; i < m; i++) {
1566: y_l = m - i - 1.0;
1567: y_r = y_l + 1.0;
1568: color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv);
1569: PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color);
1570: }
1571: }
1572: PetscDrawCollectiveEnd(draw);
1573: }
1574: MatDenseRestoreArrayRead(A, &v);
1575: return 0;
1576: }
1578: static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer)
1579: {
1580: PetscDraw draw;
1581: PetscBool isnull;
1582: PetscReal xr, yr, xl, yl, h, w;
1584: PetscViewerDrawGetDraw(viewer, 0, &draw);
1585: PetscDrawIsNull(draw, &isnull);
1586: if (isnull) return 0;
1588: xr = A->cmap->n;
1589: yr = A->rmap->n;
1590: h = yr / 10.0;
1591: w = xr / 10.0;
1592: xr += w;
1593: yr += h;
1594: xl = -w;
1595: yl = -h;
1596: PetscDrawSetCoordinates(draw, xl, yl, xr, yr);
1597: PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer);
1598: PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A);
1599: PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL);
1600: PetscDrawSave(draw);
1601: return 0;
1602: }
1604: PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer)
1605: {
1606: PetscBool iascii, isbinary, isdraw;
1608: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
1609: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
1610: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
1611: if (iascii) MatView_SeqDense_ASCII(A, viewer);
1612: else if (isbinary) MatView_Dense_Binary(A, viewer);
1613: else if (isdraw) MatView_SeqDense_Draw(A, viewer);
1614: return 0;
1615: }
1617: static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array)
1618: {
1619: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1624: a->unplacedarray = a->v;
1625: a->unplaced_user_alloc = a->user_alloc;
1626: a->v = (PetscScalar *)array;
1627: a->user_alloc = PETSC_TRUE;
1628: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1629: A->offloadmask = PETSC_OFFLOAD_CPU;
1630: #endif
1631: return 0;
1632: }
1634: static PetscErrorCode MatDenseResetArray_SeqDense(Mat A)
1635: {
1636: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1640: a->v = a->unplacedarray;
1641: a->user_alloc = a->unplaced_user_alloc;
1642: a->unplacedarray = NULL;
1643: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1644: A->offloadmask = PETSC_OFFLOAD_CPU;
1645: #endif
1646: return 0;
1647: }
1649: static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array)
1650: {
1651: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1655: if (!a->user_alloc) PetscFree(a->v);
1656: a->v = (PetscScalar *)array;
1657: a->user_alloc = PETSC_FALSE;
1658: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1659: A->offloadmask = PETSC_OFFLOAD_CPU;
1660: #endif
1661: return 0;
1662: }
1664: PetscErrorCode MatDestroy_SeqDense(Mat mat)
1665: {
1666: Mat_SeqDense *l = (Mat_SeqDense *)mat->data;
1668: #if defined(PETSC_USE_LOG)
1669: PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n);
1670: #endif
1671: VecDestroy(&(l->qrrhs));
1672: PetscFree(l->tau);
1673: PetscFree(l->pivots);
1674: PetscFree(l->fwork);
1675: MatDestroy(&l->ptapwork);
1676: if (!l->user_alloc) PetscFree(l->v);
1677: if (!l->unplaced_user_alloc) PetscFree(l->unplacedarray);
1680: VecDestroy(&l->cvec);
1681: MatDestroy(&l->cmat);
1682: PetscFree(mat->data);
1684: PetscObjectChangeTypeName((PetscObject)mat, NULL);
1685: PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL);
1686: PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL);
1687: PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL);
1688: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL);
1689: PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL);
1690: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL);
1691: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL);
1692: PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL);
1693: PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL);
1694: PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL);
1695: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL);
1696: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL);
1697: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL);
1698: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL);
1699: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL);
1700: #if defined(PETSC_HAVE_ELEMENTAL)
1701: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL);
1702: #endif
1703: #if defined(PETSC_HAVE_SCALAPACK)
1704: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL);
1705: #endif
1706: #if defined(PETSC_HAVE_CUDA)
1707: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL);
1708: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL);
1709: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL);
1710: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL);
1711: #endif
1712: #if defined(PETSC_HAVE_HIP)
1713: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensehip_C", NULL);
1714: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", NULL);
1715: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensehip_seqdense_C", NULL);
1716: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensehip_C", NULL);
1717: #endif
1718: PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL);
1719: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL);
1720: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL);
1721: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL);
1722: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL);
1724: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL);
1725: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL);
1726: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL);
1727: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL);
1728: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL);
1729: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL);
1730: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL);
1731: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL);
1732: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL);
1733: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL);
1734: return 0;
1735: }
1737: static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout)
1738: {
1739: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1740: PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n;
1741: PetscScalar *v, tmp;
1743: if (reuse == MAT_REUSE_MATRIX) MatTransposeCheckNonzeroState_Private(A, *matout);
1744: if (reuse == MAT_INPLACE_MATRIX) {
1745: if (m == n) { /* in place transpose */
1746: MatDenseGetArray(A, &v);
1747: for (j = 0; j < m; j++) {
1748: for (k = 0; k < j; k++) {
1749: tmp = v[j + k * M];
1750: v[j + k * M] = v[k + j * M];
1751: v[k + j * M] = tmp;
1752: }
1753: }
1754: MatDenseRestoreArray(A, &v);
1755: } else { /* reuse memory, temporary allocates new memory */
1756: PetscScalar *v2;
1757: PetscLayout tmplayout;
1759: PetscMalloc1((size_t)m * n, &v2);
1760: MatDenseGetArray(A, &v);
1761: for (j = 0; j < n; j++) {
1762: for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M];
1763: }
1764: PetscArraycpy(v, v2, (size_t)m * n);
1765: PetscFree(v2);
1766: MatDenseRestoreArray(A, &v);
1767: /* cleanup size dependent quantities */
1768: VecDestroy(&mat->cvec);
1769: MatDestroy(&mat->cmat);
1770: PetscFree(mat->pivots);
1771: PetscFree(mat->fwork);
1772: MatDestroy(&mat->ptapwork);
1773: /* swap row/col layouts */
1774: mat->lda = n;
1775: tmplayout = A->rmap;
1776: A->rmap = A->cmap;
1777: A->cmap = tmplayout;
1778: }
1779: } else { /* out-of-place transpose */
1780: Mat tmat;
1781: Mat_SeqDense *tmatd;
1782: PetscScalar *v2;
1783: PetscInt M2;
1785: if (reuse == MAT_INITIAL_MATRIX) {
1786: MatCreate(PetscObjectComm((PetscObject)A), &tmat);
1787: MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n);
1788: MatSetType(tmat, ((PetscObject)A)->type_name);
1789: MatSeqDenseSetPreallocation(tmat, NULL);
1790: } else tmat = *matout;
1792: MatDenseGetArrayRead(A, (const PetscScalar **)&v);
1793: MatDenseGetArray(tmat, &v2);
1794: tmatd = (Mat_SeqDense *)tmat->data;
1795: M2 = tmatd->lda;
1796: for (j = 0; j < n; j++) {
1797: for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M];
1798: }
1799: MatDenseRestoreArray(tmat, &v2);
1800: MatDenseRestoreArrayRead(A, (const PetscScalar **)&v);
1801: MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY);
1802: MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY);
1803: *matout = tmat;
1804: }
1805: return 0;
1806: }
1808: static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg)
1809: {
1810: Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data;
1811: Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data;
1812: PetscInt i;
1813: const PetscScalar *v1, *v2;
1815: if (A1->rmap->n != A2->rmap->n) {
1816: *flg = PETSC_FALSE;
1817: return 0;
1818: }
1819: if (A1->cmap->n != A2->cmap->n) {
1820: *flg = PETSC_FALSE;
1821: return 0;
1822: }
1823: MatDenseGetArrayRead(A1, &v1);
1824: MatDenseGetArrayRead(A2, &v2);
1825: for (i = 0; i < A1->cmap->n; i++) {
1826: PetscArraycmp(v1, v2, A1->rmap->n, flg);
1827: if (*flg == PETSC_FALSE) return 0;
1828: v1 += mat1->lda;
1829: v2 += mat2->lda;
1830: }
1831: MatDenseRestoreArrayRead(A1, &v1);
1832: MatDenseRestoreArrayRead(A2, &v2);
1833: *flg = PETSC_TRUE;
1834: return 0;
1835: }
1837: static PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v)
1838: {
1839: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1840: PetscInt i, n, len;
1841: PetscScalar *x;
1842: const PetscScalar *vv;
1844: VecGetSize(v, &n);
1845: VecGetArray(v, &x);
1846: len = PetscMin(A->rmap->n, A->cmap->n);
1847: MatDenseGetArrayRead(A, &vv);
1849: for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i];
1850: MatDenseRestoreArrayRead(A, &vv);
1851: VecRestoreArray(v, &x);
1852: return 0;
1853: }
1855: static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr)
1856: {
1857: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1858: const PetscScalar *l, *r;
1859: PetscScalar x, *v, *vv;
1860: PetscInt i, j, m = A->rmap->n, n = A->cmap->n;
1862: MatDenseGetArray(A, &vv);
1863: if (ll) {
1864: VecGetSize(ll, &m);
1865: VecGetArrayRead(ll, &l);
1867: for (i = 0; i < m; i++) {
1868: x = l[i];
1869: v = vv + i;
1870: for (j = 0; j < n; j++) {
1871: (*v) *= x;
1872: v += mat->lda;
1873: }
1874: }
1875: VecRestoreArrayRead(ll, &l);
1876: PetscLogFlops(1.0 * n * m);
1877: }
1878: if (rr) {
1879: VecGetSize(rr, &n);
1880: VecGetArrayRead(rr, &r);
1882: for (i = 0; i < n; i++) {
1883: x = r[i];
1884: v = vv + i * mat->lda;
1885: for (j = 0; j < m; j++) (*v++) *= x;
1886: }
1887: VecRestoreArrayRead(rr, &r);
1888: PetscLogFlops(1.0 * n * m);
1889: }
1890: MatDenseRestoreArray(A, &vv);
1891: return 0;
1892: }
1894: PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm)
1895: {
1896: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1897: PetscScalar *v, *vv;
1898: PetscReal sum = 0.0;
1899: PetscInt lda, m = A->rmap->n, i, j;
1901: MatDenseGetArrayRead(A, (const PetscScalar **)&vv);
1902: MatDenseGetLDA(A, &lda);
1903: v = vv;
1904: if (type == NORM_FROBENIUS) {
1905: if (lda > m) {
1906: for (j = 0; j < A->cmap->n; j++) {
1907: v = vv + j * lda;
1908: for (i = 0; i < m; i++) {
1909: sum += PetscRealPart(PetscConj(*v) * (*v));
1910: v++;
1911: }
1912: }
1913: } else {
1914: #if defined(PETSC_USE_REAL___FP16)
1915: PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n;
1916: PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one));
1917: }
1918: #else
1919: for (i = 0; i < A->cmap->n * A->rmap->n; i++) {
1920: sum += PetscRealPart(PetscConj(*v) * (*v));
1921: v++;
1922: }
1923: }
1924: *nrm = PetscSqrtReal(sum);
1925: #endif
1926: PetscLogFlops(2.0 * A->cmap->n * A->rmap->n);
1927: } else if (type == NORM_1) {
1928: *nrm = 0.0;
1929: for (j = 0; j < A->cmap->n; j++) {
1930: v = vv + j * mat->lda;
1931: sum = 0.0;
1932: for (i = 0; i < A->rmap->n; i++) {
1933: sum += PetscAbsScalar(*v);
1934: v++;
1935: }
1936: if (sum > *nrm) *nrm = sum;
1937: }
1938: PetscLogFlops(1.0 * A->cmap->n * A->rmap->n);
1939: } else if (type == NORM_INFINITY) {
1940: *nrm = 0.0;
1941: for (j = 0; j < A->rmap->n; j++) {
1942: v = vv + j;
1943: sum = 0.0;
1944: for (i = 0; i < A->cmap->n; i++) {
1945: sum += PetscAbsScalar(*v);
1946: v += mat->lda;
1947: }
1948: if (sum > *nrm) *nrm = sum;
1949: }
1950: PetscLogFlops(1.0 * A->cmap->n * A->rmap->n);
1951: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm");
1952: MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv);
1953: return 0;
1954: }
1956: static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg)
1957: {
1958: Mat_SeqDense *aij = (Mat_SeqDense *)A->data;
1960: switch (op) {
1961: case MAT_ROW_ORIENTED:
1962: aij->roworiented = flg;
1963: break;
1964: case MAT_NEW_NONZERO_LOCATIONS:
1965: case MAT_NEW_NONZERO_LOCATION_ERR:
1966: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1967: case MAT_FORCE_DIAGONAL_ENTRIES:
1968: case MAT_KEEP_NONZERO_PATTERN:
1969: case MAT_IGNORE_OFF_PROC_ENTRIES:
1970: case MAT_USE_HASH_TABLE:
1971: case MAT_IGNORE_ZERO_ENTRIES:
1972: case MAT_IGNORE_LOWER_TRIANGULAR:
1973: case MAT_SORTED_FULL:
1974: PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
1975: break;
1976: case MAT_SPD:
1977: case MAT_SYMMETRIC:
1978: case MAT_STRUCTURALLY_SYMMETRIC:
1979: case MAT_HERMITIAN:
1980: case MAT_SYMMETRY_ETERNAL:
1981: case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1982: case MAT_SPD_ETERNAL:
1983: break;
1984: default:
1985: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
1986: }
1987: return 0;
1988: }
1990: PetscErrorCode MatZeroEntries_SeqDense(Mat A)
1991: {
1992: Mat_SeqDense *l = (Mat_SeqDense *)A->data;
1993: PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j;
1994: PetscScalar *v;
1996: MatDenseGetArrayWrite(A, &v);
1997: if (lda > m) {
1998: for (j = 0; j < n; j++) PetscArrayzero(v + j * lda, m);
1999: } else {
2000: PetscArrayzero(v, PetscInt64Mult(m, n));
2001: }
2002: MatDenseRestoreArrayWrite(A, &v);
2003: return 0;
2004: }
2006: static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
2007: {
2008: Mat_SeqDense *l = (Mat_SeqDense *)A->data;
2009: PetscInt m = l->lda, n = A->cmap->n, i, j;
2010: PetscScalar *slot, *bb, *v;
2011: const PetscScalar *xx;
2013: if (PetscDefined(USE_DEBUG)) {
2014: for (i = 0; i < N; i++) {
2017: }
2018: }
2019: if (!N) return 0;
2021: /* fix right hand side if needed */
2022: if (x && b) {
2023: VecGetArrayRead(x, &xx);
2024: VecGetArray(b, &bb);
2025: for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
2026: VecRestoreArrayRead(x, &xx);
2027: VecRestoreArray(b, &bb);
2028: }
2030: MatDenseGetArray(A, &v);
2031: for (i = 0; i < N; i++) {
2032: slot = v + rows[i];
2033: for (j = 0; j < n; j++) {
2034: *slot = 0.0;
2035: slot += m;
2036: }
2037: }
2038: if (diag != 0.0) {
2040: for (i = 0; i < N; i++) {
2041: slot = v + (m + 1) * rows[i];
2042: *slot = diag;
2043: }
2044: }
2045: MatDenseRestoreArray(A, &v);
2046: return 0;
2047: }
2049: static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda)
2050: {
2051: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2053: *lda = mat->lda;
2054: return 0;
2055: }
2057: PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array)
2058: {
2059: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2062: *array = mat->v;
2063: return 0;
2064: }
2066: PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array)
2067: {
2068: if (array) *array = NULL;
2069: return 0;
2070: }
2072: /*@
2073: MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()`
2075: Not collective
2077: Input Parameter:
2078: . mat - a `MATDENSE` or `MATDENSECUDA` matrix
2080: Output Parameter:
2081: . lda - the leading dimension
2083: Level: intermediate
2085: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()`
2086: @*/
2087: PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda)
2088: {
2091: MatCheckPreallocated(A, 1);
2092: PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda));
2093: return 0;
2094: }
2096: /*@
2097: MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix
2099: Not collective
2101: Input Parameters:
2102: + mat - a `MATDENSE` or `MATDENSECUDA` matrix
2103: - lda - the leading dimension
2105: Level: intermediate
2107: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()`
2108: @*/
2109: PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda)
2110: {
2112: PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda));
2113: return 0;
2114: }
2116: /*@C
2117: MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored
2119: Logically Collective
2121: Input Parameter:
2122: . mat - a dense matrix
2124: Output Parameter:
2125: . array - pointer to the data
2127: Level: intermediate
2129: .seealso: `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2130: @*/
2131: PetscErrorCode MatDenseGetArray(Mat A, PetscScalar **array)
2132: {
2135: PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array));
2136: return 0;
2137: }
2139: /*@C
2140: MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()`
2142: Logically Collective
2144: Input Parameters:
2145: + mat - a dense matrix
2146: - array - pointer to the data (may be NULL)
2148: Level: intermediate
2150: .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2151: @*/
2152: PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar **array)
2153: {
2156: PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array));
2157: PetscObjectStateIncrease((PetscObject)A);
2158: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2159: A->offloadmask = PETSC_OFFLOAD_CPU;
2160: #endif
2161: return 0;
2162: }
2164: /*@C
2165: MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored
2167: Not Collective
2169: Input Parameter:
2170: . mat - a dense matrix
2172: Output Parameter:
2173: . array - pointer to the data
2175: Level: intermediate
2177: .seealso: `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2178: @*/
2179: PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar **array)
2180: {
2183: PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, const PetscScalar **), (A, array));
2184: return 0;
2185: }
2187: /*@C
2188: MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()`
2190: Not Collective
2192: Input Parameters:
2193: + mat - a dense matrix
2194: - array - pointer to the data (may be NULL)
2196: Level: intermediate
2198: .seealso: `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2199: @*/
2200: PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar **array)
2201: {
2204: PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, const PetscScalar **), (A, array));
2205: return 0;
2206: }
2208: /*@C
2209: MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored
2211: Not Collective
2213: Input Parameter:
2214: . mat - a dense matrix
2216: Output Parameter:
2217: . array - pointer to the data
2219: Level: intermediate
2221: .seealso: `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
2222: @*/
2223: PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar **array)
2224: {
2227: PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array));
2228: return 0;
2229: }
2231: /*@C
2232: MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()`
2234: Not Collective
2236: Input Parameters:
2237: + mat - a dense matrix
2238: - array - pointer to the data (may be NULL)
2240: Level: intermediate
2242: .seealso: `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
2243: @*/
2244: PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar **array)
2245: {
2248: PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array));
2249: PetscObjectStateIncrease((PetscObject)A);
2250: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2251: A->offloadmask = PETSC_OFFLOAD_CPU;
2252: #endif
2253: return 0;
2254: }
2256: static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
2257: {
2258: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2259: PetscInt i, j, nrows, ncols, ldb;
2260: const PetscInt *irow, *icol;
2261: PetscScalar *av, *bv, *v = mat->v;
2262: Mat newmat;
2264: ISGetIndices(isrow, &irow);
2265: ISGetIndices(iscol, &icol);
2266: ISGetLocalSize(isrow, &nrows);
2267: ISGetLocalSize(iscol, &ncols);
2269: /* Check submatrixcall */
2270: if (scall == MAT_REUSE_MATRIX) {
2271: PetscInt n_cols, n_rows;
2272: MatGetSize(*B, &n_rows, &n_cols);
2273: if (n_rows != nrows || n_cols != ncols) {
2274: /* resize the result matrix to match number of requested rows/columns */
2275: MatSetSizes(*B, nrows, ncols, nrows, ncols);
2276: }
2277: newmat = *B;
2278: } else {
2279: /* Create and fill new matrix */
2280: MatCreate(PetscObjectComm((PetscObject)A), &newmat);
2281: MatSetSizes(newmat, nrows, ncols, nrows, ncols);
2282: MatSetType(newmat, ((PetscObject)A)->type_name);
2283: MatSeqDenseSetPreallocation(newmat, NULL);
2284: }
2286: /* Now extract the data pointers and do the copy,column at a time */
2287: MatDenseGetArray(newmat, &bv);
2288: MatDenseGetLDA(newmat, &ldb);
2289: for (i = 0; i < ncols; i++) {
2290: av = v + mat->lda * icol[i];
2291: for (j = 0; j < nrows; j++) bv[j] = av[irow[j]];
2292: bv += ldb;
2293: }
2294: MatDenseRestoreArray(newmat, &bv);
2296: /* Assemble the matrices so that the correct flags are set */
2297: MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY);
2298: MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY);
2300: /* Free work space */
2301: ISRestoreIndices(isrow, &irow);
2302: ISRestoreIndices(iscol, &icol);
2303: *B = newmat;
2304: return 0;
2305: }
2307: static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
2308: {
2309: PetscInt i;
2311: if (scall == MAT_INITIAL_MATRIX) PetscCalloc1(n, B);
2313: for (i = 0; i < n; i++) MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i]);
2314: return 0;
2315: }
2317: static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode)
2318: {
2319: return 0;
2320: }
2322: static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode)
2323: {
2324: return 0;
2325: }
2327: PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str)
2328: {
2329: Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data;
2330: const PetscScalar *va;
2331: PetscScalar *vb;
2332: PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j;
2334: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2335: if (A->ops->copy != B->ops->copy) {
2336: MatCopy_Basic(A, B, str);
2337: return 0;
2338: }
2340: MatDenseGetArrayRead(A, &va);
2341: MatDenseGetArray(B, &vb);
2342: if (lda1 > m || lda2 > m) {
2343: for (j = 0; j < n; j++) PetscArraycpy(vb + j * lda2, va + j * lda1, m);
2344: } else {
2345: PetscArraycpy(vb, va, A->rmap->n * A->cmap->n);
2346: }
2347: MatDenseRestoreArray(B, &vb);
2348: MatDenseRestoreArrayRead(A, &va);
2349: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
2350: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
2351: return 0;
2352: }
2354: PetscErrorCode MatSetUp_SeqDense(Mat A)
2355: {
2356: PetscLayoutSetUp(A->rmap);
2357: PetscLayoutSetUp(A->cmap);
2358: if (!A->preallocated) MatSeqDenseSetPreallocation(A, NULL);
2359: return 0;
2360: }
2362: static PetscErrorCode MatConjugate_SeqDense(Mat A)
2363: {
2364: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2365: PetscInt i, j;
2366: PetscInt min = PetscMin(A->rmap->n, A->cmap->n);
2367: PetscScalar *aa;
2369: MatDenseGetArray(A, &aa);
2370: for (j = 0; j < A->cmap->n; j++) {
2371: for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]);
2372: }
2373: MatDenseRestoreArray(A, &aa);
2374: if (mat->tau)
2375: for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]);
2376: return 0;
2377: }
2379: static PetscErrorCode MatRealPart_SeqDense(Mat A)
2380: {
2381: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2382: PetscInt i, j;
2383: PetscScalar *aa;
2385: MatDenseGetArray(A, &aa);
2386: for (j = 0; j < A->cmap->n; j++) {
2387: for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]);
2388: }
2389: MatDenseRestoreArray(A, &aa);
2390: return 0;
2391: }
2393: static PetscErrorCode MatImaginaryPart_SeqDense(Mat A)
2394: {
2395: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2396: PetscInt i, j;
2397: PetscScalar *aa;
2399: MatDenseGetArray(A, &aa);
2400: for (j = 0; j < A->cmap->n; j++) {
2401: for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]);
2402: }
2403: MatDenseRestoreArray(A, &aa);
2404: return 0;
2405: }
2407: /* ----------------------------------------------------------------*/
2408: PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2409: {
2410: PetscInt m = A->rmap->n, n = B->cmap->n;
2411: PetscBool cisdense = PETSC_FALSE;
2413: MatSetSizes(C, m, n, m, n);
2414: #if defined(PETSC_HAVE_CUDA)
2415: PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
2416: #endif
2417: #if defined(PETSC_HAVE_HIP)
2418: PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "");
2419: #endif
2420: if (!cisdense) {
2421: PetscBool flg;
2423: PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg);
2424: MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
2425: }
2426: MatSetUp(C);
2427: return 0;
2428: }
2430: PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2431: {
2432: Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data;
2433: PetscBLASInt m, n, k;
2434: const PetscScalar *av, *bv;
2435: PetscScalar *cv;
2436: PetscScalar _DOne = 1.0, _DZero = 0.0;
2438: PetscBLASIntCast(C->rmap->n, &m);
2439: PetscBLASIntCast(C->cmap->n, &n);
2440: PetscBLASIntCast(A->cmap->n, &k);
2441: if (!m || !n || !k) return 0;
2442: MatDenseGetArrayRead(A, &av);
2443: MatDenseGetArrayRead(B, &bv);
2444: MatDenseGetArrayWrite(C, &cv);
2445: PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2446: PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1));
2447: MatDenseRestoreArrayRead(A, &av);
2448: MatDenseRestoreArrayRead(B, &bv);
2449: MatDenseRestoreArrayWrite(C, &cv);
2450: return 0;
2451: }
2453: PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2454: {
2455: PetscInt m = A->rmap->n, n = B->rmap->n;
2456: PetscBool cisdense = PETSC_FALSE;
2458: MatSetSizes(C, m, n, m, n);
2459: #if defined(PETSC_HAVE_CUDA)
2460: PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
2461: #endif
2462: #if defined(PETSC_HAVE_HIP)
2463: PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "");
2464: #endif
2465: if (!cisdense) {
2466: PetscBool flg;
2468: PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg);
2469: MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
2470: }
2471: MatSetUp(C);
2472: return 0;
2473: }
2475: PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2476: {
2477: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2478: Mat_SeqDense *b = (Mat_SeqDense *)B->data;
2479: Mat_SeqDense *c = (Mat_SeqDense *)C->data;
2480: const PetscScalar *av, *bv;
2481: PetscScalar *cv;
2482: PetscBLASInt m, n, k;
2483: PetscScalar _DOne = 1.0, _DZero = 0.0;
2485: PetscBLASIntCast(C->rmap->n, &m);
2486: PetscBLASIntCast(C->cmap->n, &n);
2487: PetscBLASIntCast(A->cmap->n, &k);
2488: if (!m || !n || !k) return 0;
2489: MatDenseGetArrayRead(A, &av);
2490: MatDenseGetArrayRead(B, &bv);
2491: MatDenseGetArrayWrite(C, &cv);
2492: PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2493: MatDenseRestoreArrayRead(A, &av);
2494: MatDenseRestoreArrayRead(B, &bv);
2495: MatDenseRestoreArrayWrite(C, &cv);
2496: PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1));
2497: return 0;
2498: }
2500: PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2501: {
2502: PetscInt m = A->cmap->n, n = B->cmap->n;
2503: PetscBool cisdense = PETSC_FALSE;
2505: MatSetSizes(C, m, n, m, n);
2506: #if defined(PETSC_HAVE_CUDA)
2507: PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
2508: #endif
2509: #if defined(PETSC_HAVE_HIP)
2510: PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSEHIP, "");
2511: #endif
2512: if (!cisdense) {
2513: PetscBool flg;
2515: PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg);
2516: MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
2517: }
2518: MatSetUp(C);
2519: return 0;
2520: }
2522: PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2523: {
2524: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2525: Mat_SeqDense *b = (Mat_SeqDense *)B->data;
2526: Mat_SeqDense *c = (Mat_SeqDense *)C->data;
2527: const PetscScalar *av, *bv;
2528: PetscScalar *cv;
2529: PetscBLASInt m, n, k;
2530: PetscScalar _DOne = 1.0, _DZero = 0.0;
2532: PetscBLASIntCast(C->rmap->n, &m);
2533: PetscBLASIntCast(C->cmap->n, &n);
2534: PetscBLASIntCast(A->rmap->n, &k);
2535: if (!m || !n || !k) return 0;
2536: MatDenseGetArrayRead(A, &av);
2537: MatDenseGetArrayRead(B, &bv);
2538: MatDenseGetArrayWrite(C, &cv);
2539: PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2540: MatDenseRestoreArrayRead(A, &av);
2541: MatDenseRestoreArrayRead(B, &bv);
2542: MatDenseRestoreArrayWrite(C, &cv);
2543: PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1));
2544: return 0;
2545: }
2547: /* ----------------------------------------------- */
2548: static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C)
2549: {
2550: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense;
2551: C->ops->productsymbolic = MatProductSymbolic_AB;
2552: return 0;
2553: }
2555: static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C)
2556: {
2557: C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense;
2558: C->ops->productsymbolic = MatProductSymbolic_AtB;
2559: return 0;
2560: }
2562: static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C)
2563: {
2564: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense;
2565: C->ops->productsymbolic = MatProductSymbolic_ABt;
2566: return 0;
2567: }
2569: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C)
2570: {
2571: Mat_Product *product = C->product;
2573: switch (product->type) {
2574: case MATPRODUCT_AB:
2575: MatProductSetFromOptions_SeqDense_AB(C);
2576: break;
2577: case MATPRODUCT_AtB:
2578: MatProductSetFromOptions_SeqDense_AtB(C);
2579: break;
2580: case MATPRODUCT_ABt:
2581: MatProductSetFromOptions_SeqDense_ABt(C);
2582: break;
2583: default:
2584: break;
2585: }
2586: return 0;
2587: }
2588: /* ----------------------------------------------- */
2590: static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[])
2591: {
2592: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2593: PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p;
2594: PetscScalar *x;
2595: const PetscScalar *aa;
2598: VecGetArray(v, &x);
2599: VecGetLocalSize(v, &p);
2600: MatDenseGetArrayRead(A, &aa);
2602: for (i = 0; i < m; i++) {
2603: x[i] = aa[i];
2604: if (idx) idx[i] = 0;
2605: for (j = 1; j < n; j++) {
2606: if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) {
2607: x[i] = aa[i + a->lda * j];
2608: if (idx) idx[i] = j;
2609: }
2610: }
2611: }
2612: MatDenseRestoreArrayRead(A, &aa);
2613: VecRestoreArray(v, &x);
2614: return 0;
2615: }
2617: static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[])
2618: {
2619: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2620: PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p;
2621: PetscScalar *x;
2622: PetscReal atmp;
2623: const PetscScalar *aa;
2626: VecGetArray(v, &x);
2627: VecGetLocalSize(v, &p);
2628: MatDenseGetArrayRead(A, &aa);
2630: for (i = 0; i < m; i++) {
2631: x[i] = PetscAbsScalar(aa[i]);
2632: for (j = 1; j < n; j++) {
2633: atmp = PetscAbsScalar(aa[i + a->lda * j]);
2634: if (PetscAbsScalar(x[i]) < atmp) {
2635: x[i] = atmp;
2636: if (idx) idx[i] = j;
2637: }
2638: }
2639: }
2640: MatDenseRestoreArrayRead(A, &aa);
2641: VecRestoreArray(v, &x);
2642: return 0;
2643: }
2645: static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[])
2646: {
2647: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2648: PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p;
2649: PetscScalar *x;
2650: const PetscScalar *aa;
2653: MatDenseGetArrayRead(A, &aa);
2654: VecGetArray(v, &x);
2655: VecGetLocalSize(v, &p);
2657: for (i = 0; i < m; i++) {
2658: x[i] = aa[i];
2659: if (idx) idx[i] = 0;
2660: for (j = 1; j < n; j++) {
2661: if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) {
2662: x[i] = aa[i + a->lda * j];
2663: if (idx) idx[i] = j;
2664: }
2665: }
2666: }
2667: VecRestoreArray(v, &x);
2668: MatDenseRestoreArrayRead(A, &aa);
2669: return 0;
2670: }
2672: PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col)
2673: {
2674: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2675: PetscScalar *x;
2676: const PetscScalar *aa;
2679: MatDenseGetArrayRead(A, &aa);
2680: VecGetArray(v, &x);
2681: PetscArraycpy(x, aa + col * a->lda, A->rmap->n);
2682: VecRestoreArray(v, &x);
2683: MatDenseRestoreArrayRead(A, &aa);
2684: return 0;
2685: }
2687: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions)
2688: {
2689: PetscInt i, j, m, n;
2690: const PetscScalar *a;
2692: MatGetSize(A, &m, &n);
2693: PetscArrayzero(reductions, n);
2694: MatDenseGetArrayRead(A, &a);
2695: if (type == NORM_2) {
2696: for (i = 0; i < n; i++) {
2697: for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]);
2698: a += m;
2699: }
2700: } else if (type == NORM_1) {
2701: for (i = 0; i < n; i++) {
2702: for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]);
2703: a += m;
2704: }
2705: } else if (type == NORM_INFINITY) {
2706: for (i = 0; i < n; i++) {
2707: for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]);
2708: a += m;
2709: }
2710: } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2711: for (i = 0; i < n; i++) {
2712: for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]);
2713: a += m;
2714: }
2715: } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2716: for (i = 0; i < n; i++) {
2717: for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]);
2718: a += m;
2719: }
2720: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2721: MatDenseRestoreArrayRead(A, &a);
2722: if (type == NORM_2) {
2723: for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2724: } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2725: for (i = 0; i < n; i++) reductions[i] /= m;
2726: }
2727: return 0;
2728: }
2730: PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx)
2731: {
2732: PetscScalar *a;
2733: PetscInt lda, m, n, i, j;
2735: MatGetSize(x, &m, &n);
2736: MatDenseGetLDA(x, &lda);
2737: MatDenseGetArrayWrite(x, &a);
2738: for (j = 0; j < n; j++) {
2739: for (i = 0; i < m; i++) PetscRandomGetValue(rctx, a + j * lda + i);
2740: }
2741: MatDenseRestoreArrayWrite(x, &a);
2742: return 0;
2743: }
2745: static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d)
2746: {
2747: *missing = PETSC_FALSE;
2748: return 0;
2749: }
2751: /* vals is not const */
2752: static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals)
2753: {
2754: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2755: PetscScalar *v;
2758: MatDenseGetArray(A, &v);
2759: *vals = v + col * a->lda;
2760: MatDenseRestoreArray(A, &v);
2761: return 0;
2762: }
2764: static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals)
2765: {
2766: if (vals) *vals = NULL; /* user cannot accidentally use the array later */
2767: return 0;
2768: }
2770: /* -------------------------------------------------------------------*/
2771: static struct _MatOps MatOps_Values = {MatSetValues_SeqDense,
2772: MatGetRow_SeqDense,
2773: MatRestoreRow_SeqDense,
2774: MatMult_SeqDense,
2775: /* 4*/ MatMultAdd_SeqDense,
2776: MatMultTranspose_SeqDense,
2777: MatMultTransposeAdd_SeqDense,
2778: NULL,
2779: NULL,
2780: NULL,
2781: /* 10*/ NULL,
2782: MatLUFactor_SeqDense,
2783: MatCholeskyFactor_SeqDense,
2784: MatSOR_SeqDense,
2785: MatTranspose_SeqDense,
2786: /* 15*/ MatGetInfo_SeqDense,
2787: MatEqual_SeqDense,
2788: MatGetDiagonal_SeqDense,
2789: MatDiagonalScale_SeqDense,
2790: MatNorm_SeqDense,
2791: /* 20*/ MatAssemblyBegin_SeqDense,
2792: MatAssemblyEnd_SeqDense,
2793: MatSetOption_SeqDense,
2794: MatZeroEntries_SeqDense,
2795: /* 24*/ MatZeroRows_SeqDense,
2796: NULL,
2797: NULL,
2798: NULL,
2799: NULL,
2800: /* 29*/ MatSetUp_SeqDense,
2801: NULL,
2802: NULL,
2803: NULL,
2804: NULL,
2805: /* 34*/ MatDuplicate_SeqDense,
2806: NULL,
2807: NULL,
2808: NULL,
2809: NULL,
2810: /* 39*/ MatAXPY_SeqDense,
2811: MatCreateSubMatrices_SeqDense,
2812: NULL,
2813: MatGetValues_SeqDense,
2814: MatCopy_SeqDense,
2815: /* 44*/ MatGetRowMax_SeqDense,
2816: MatScale_SeqDense,
2817: MatShift_SeqDense,
2818: NULL,
2819: MatZeroRowsColumns_SeqDense,
2820: /* 49*/ MatSetRandom_SeqDense,
2821: NULL,
2822: NULL,
2823: NULL,
2824: NULL,
2825: /* 54*/ NULL,
2826: NULL,
2827: NULL,
2828: NULL,
2829: NULL,
2830: /* 59*/ MatCreateSubMatrix_SeqDense,
2831: MatDestroy_SeqDense,
2832: MatView_SeqDense,
2833: NULL,
2834: NULL,
2835: /* 64*/ NULL,
2836: NULL,
2837: NULL,
2838: NULL,
2839: NULL,
2840: /* 69*/ MatGetRowMaxAbs_SeqDense,
2841: NULL,
2842: NULL,
2843: NULL,
2844: NULL,
2845: /* 74*/ NULL,
2846: NULL,
2847: NULL,
2848: NULL,
2849: NULL,
2850: /* 79*/ NULL,
2851: NULL,
2852: NULL,
2853: NULL,
2854: /* 83*/ MatLoad_SeqDense,
2855: MatIsSymmetric_SeqDense,
2856: MatIsHermitian_SeqDense,
2857: NULL,
2858: NULL,
2859: NULL,
2860: /* 89*/ NULL,
2861: NULL,
2862: MatMatMultNumeric_SeqDense_SeqDense,
2863: NULL,
2864: NULL,
2865: /* 94*/ NULL,
2866: NULL,
2867: NULL,
2868: MatMatTransposeMultNumeric_SeqDense_SeqDense,
2869: NULL,
2870: /* 99*/ MatProductSetFromOptions_SeqDense,
2871: NULL,
2872: NULL,
2873: MatConjugate_SeqDense,
2874: NULL,
2875: /*104*/ NULL,
2876: MatRealPart_SeqDense,
2877: MatImaginaryPart_SeqDense,
2878: NULL,
2879: NULL,
2880: /*109*/ NULL,
2881: NULL,
2882: MatGetRowMin_SeqDense,
2883: MatGetColumnVector_SeqDense,
2884: MatMissingDiagonal_SeqDense,
2885: /*114*/ NULL,
2886: NULL,
2887: NULL,
2888: NULL,
2889: NULL,
2890: /*119*/ NULL,
2891: NULL,
2892: NULL,
2893: NULL,
2894: NULL,
2895: /*124*/ NULL,
2896: MatGetColumnReductions_SeqDense,
2897: NULL,
2898: NULL,
2899: NULL,
2900: /*129*/ NULL,
2901: NULL,
2902: NULL,
2903: MatTransposeMatMultNumeric_SeqDense_SeqDense,
2904: NULL,
2905: /*134*/ NULL,
2906: NULL,
2907: NULL,
2908: NULL,
2909: NULL,
2910: /*139*/ NULL,
2911: NULL,
2912: NULL,
2913: NULL,
2914: NULL,
2915: MatCreateMPIMatConcatenateSeqMat_SeqDense,
2916: /*145*/ NULL,
2917: NULL,
2918: NULL,
2919: NULL,
2920: NULL,
2921: /*150*/ NULL};
2923: /*@C
2924: MatCreateSeqDense - Creates a `MATSEQDENSE` that
2925: is stored in column major order (the usual Fortran 77 manner). Many
2926: of the matrix operations use the BLAS and LAPACK routines.
2928: Collective
2930: Input Parameters:
2931: + comm - MPI communicator, set to `PETSC_COMM_SELF`
2932: . m - number of rows
2933: . n - number of columns
2934: - data - optional location of matrix data in column major order. Set data=NULL for PETSc
2935: to control all matrix memory allocation.
2937: Output Parameter:
2938: . A - the matrix
2940: Note:
2941: The data input variable is intended primarily for Fortran programmers
2942: who wish to allocate their own matrix memory space. Most users should
2943: set data=NULL.
2945: Level: intermediate
2947: .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
2948: @*/
2949: PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar *data, Mat *A)
2950: {
2951: MatCreate(comm, A);
2952: MatSetSizes(*A, m, n, m, n);
2953: MatSetType(*A, MATSEQDENSE);
2954: MatSeqDenseSetPreallocation(*A, data);
2955: return 0;
2956: }
2958: /*@C
2959: MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix
2961: Collective
2963: Input Parameters:
2964: + B - the matrix
2965: - data - the array (or NULL)
2967: Note:
2968: The data input variable is intended primarily for Fortran programmers
2969: who wish to allocate their own matrix memory space. Most users should
2970: need not call this routine.
2972: Level: intermediate
2974: .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()`
2975: @*/
2976: PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[])
2977: {
2979: PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data));
2980: return 0;
2981: }
2983: PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data)
2984: {
2985: Mat_SeqDense *b = (Mat_SeqDense *)B->data;
2988: B->preallocated = PETSC_TRUE;
2990: PetscLayoutSetUp(B->rmap);
2991: PetscLayoutSetUp(B->cmap);
2993: if (b->lda <= 0) b->lda = B->rmap->n;
2995: if (!data) { /* petsc-allocated storage */
2996: if (!b->user_alloc) PetscFree(b->v);
2997: PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v);
2999: b->user_alloc = PETSC_FALSE;
3000: } else { /* user-allocated storage */
3001: if (!b->user_alloc) PetscFree(b->v);
3002: b->v = data;
3003: b->user_alloc = PETSC_TRUE;
3004: }
3005: B->assembled = PETSC_TRUE;
3006: return 0;
3007: }
3009: #if defined(PETSC_HAVE_ELEMENTAL)
3010: PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
3011: {
3012: Mat mat_elemental;
3013: const PetscScalar *array;
3014: PetscScalar *v_colwise;
3015: PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols;
3017: PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols);
3018: MatDenseGetArrayRead(A, &array);
3019: /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */
3020: k = 0;
3021: for (j = 0; j < N; j++) {
3022: cols[j] = j;
3023: for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++];
3024: }
3025: for (i = 0; i < M; i++) rows[i] = i;
3026: MatDenseRestoreArrayRead(A, &array);
3028: MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
3029: MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N);
3030: MatSetType(mat_elemental, MATELEMENTAL);
3031: MatSetUp(mat_elemental);
3033: /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
3034: MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES);
3035: MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
3036: MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
3037: PetscFree3(v_colwise, rows, cols);
3039: if (reuse == MAT_INPLACE_MATRIX) {
3040: MatHeaderReplace(A, &mat_elemental);
3041: } else {
3042: *newmat = mat_elemental;
3043: }
3044: return 0;
3045: }
3046: #endif
3048: PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda)
3049: {
3050: Mat_SeqDense *b = (Mat_SeqDense *)B->data;
3051: PetscBool data;
3053: data = (PetscBool)((B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE);
3056: b->lda = lda;
3057: return 0;
3058: }
3060: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3061: {
3062: MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat);
3063: return 0;
3064: }
3066: PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
3067: {
3068: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3072: if (!a->cvec) { VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec); }
3073: a->vecinuse = col + 1;
3074: MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse);
3075: VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda);
3076: *v = a->cvec;
3077: return 0;
3078: }
3080: PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
3081: {
3082: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3086: a->vecinuse = 0;
3087: MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse);
3088: VecResetArray(a->cvec);
3089: if (v) *v = NULL;
3090: return 0;
3091: }
3093: PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
3094: {
3095: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3099: if (!a->cvec) { VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec); }
3100: a->vecinuse = col + 1;
3101: MatDenseGetArrayRead(A, &a->ptrinuse);
3102: VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda);
3103: VecLockReadPush(a->cvec);
3104: *v = a->cvec;
3105: return 0;
3106: }
3108: PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
3109: {
3110: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3114: a->vecinuse = 0;
3115: MatDenseRestoreArrayRead(A, &a->ptrinuse);
3116: VecLockReadPop(a->cvec);
3117: VecResetArray(a->cvec);
3118: if (v) *v = NULL;
3119: return 0;
3120: }
3122: PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
3123: {
3124: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3128: if (!a->cvec) { VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec); }
3129: a->vecinuse = col + 1;
3130: MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse);
3131: VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda);
3132: *v = a->cvec;
3133: return 0;
3134: }
3136: PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
3137: {
3138: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3142: a->vecinuse = 0;
3143: MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse);
3144: VecResetArray(a->cvec);
3145: if (v) *v = NULL;
3146: return 0;
3147: }
3149: PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
3150: {
3151: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3155: if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) MatDestroy(&a->cmat);
3156: if (!a->cmat) {
3157: MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, a->v + rbegin + (size_t)cbegin * a->lda, &a->cmat);
3158: } else {
3159: MatDensePlaceArray(a->cmat, a->v + rbegin + (size_t)cbegin * a->lda);
3160: }
3161: MatDenseSetLDA(a->cmat, a->lda);
3162: a->matinuse = cbegin + 1;
3163: *v = a->cmat;
3164: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
3165: A->offloadmask = PETSC_OFFLOAD_CPU;
3166: #endif
3167: return 0;
3168: }
3170: PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v)
3171: {
3172: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3177: a->matinuse = 0;
3178: MatDenseResetArray(a->cmat);
3179: if (v) *v = NULL;
3180: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
3181: A->offloadmask = PETSC_OFFLOAD_CPU;
3182: #endif
3183: return 0;
3184: }
3186: /*MC
3187: MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices.
3189: Options Database Keys:
3190: . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()`
3192: Level: beginner
3194: .seealso: `MATSEQDENSE`, `MatCreateSeqDense()`
3195: M*/
3196: PetscErrorCode MatCreate_SeqDense(Mat B)
3197: {
3198: Mat_SeqDense *b;
3199: PetscMPIInt size;
3201: MPI_Comm_size(PetscObjectComm((PetscObject)B), &size);
3204: PetscNew(&b);
3205: PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps));
3206: B->data = (void *)b;
3208: b->roworiented = PETSC_TRUE;
3210: PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense);
3211: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense);
3212: PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense);
3213: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense);
3214: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense);
3215: PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense);
3216: PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense);
3217: PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense);
3218: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense);
3219: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense);
3220: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense);
3221: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense);
3222: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ);
3223: #if defined(PETSC_HAVE_ELEMENTAL)
3224: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental);
3225: #endif
3226: #if defined(PETSC_HAVE_SCALAPACK)
3227: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK);
3228: #endif
3229: #if defined(PETSC_HAVE_CUDA)
3230: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA);
3231: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense);
3232: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense);
3233: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense);
3234: #endif
3235: #if defined(PETSC_HAVE_HIP)
3236: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensehip_C", MatConvert_SeqDense_SeqDenseHIP);
3237: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdensehip_C", MatProductSetFromOptions_SeqDense);
3238: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensehip_seqdense_C", MatProductSetFromOptions_SeqDense);
3239: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensehip_C", MatProductSetFromOptions_SeqDense);
3240: #endif
3241: PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense);
3242: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense);
3243: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense);
3244: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense);
3245: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense);
3247: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense);
3248: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense);
3249: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense);
3250: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense);
3251: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense);
3252: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense);
3253: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense);
3254: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense);
3255: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense);
3256: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense);
3257: PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE);
3258: return 0;
3259: }
3261: /*@C
3262: MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding.
3264: Not Collective
3266: Input Parameters:
3267: + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix
3268: - col - column index
3270: Output Parameter:
3271: . vals - pointer to the data
3273: Level: intermediate
3275: Note:
3276: Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec`
3278: .seealso: `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()`
3279: @*/
3280: PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar **vals)
3281: {
3285: PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals));
3286: return 0;
3287: }
3289: /*@C
3290: MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`.
3292: Not Collective
3294: Input Parameters:
3295: + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix
3296: - vals - pointer to the data (may be NULL)
3298: Level: intermediate
3300: .seealso: `MATDENSE`, `MatDenseGetColumn()`
3301: @*/
3302: PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar **vals)
3303: {
3306: PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals));
3307: return 0;
3308: }
3310: /*@
3311: MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`.
3313: Collective
3315: Input Parameters:
3316: + mat - the `Mat` object
3317: - col - the column index
3319: Output Parameter:
3320: . v - the vector
3322: Notes:
3323: The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed.
3325: Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access.
3327: Level: intermediate
3329: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()`
3330: @*/
3331: PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v)
3332: {
3339: PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
3340: return 0;
3341: }
3343: /*@
3344: MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVec().
3346: Collective
3348: Input Parameters:
3349: + mat - the Mat object
3350: . col - the column index
3351: - v - the Vec object (may be NULL)
3353: Level: intermediate
3355: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3356: @*/
3357: PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v)
3358: {
3364: PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
3365: return 0;
3366: }
3368: /*@
3369: MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a Vec.
3371: Collective
3373: Input Parameters:
3374: + mat - the Mat object
3375: - col - the column index
3377: Output Parameter:
3378: . v - the vector
3380: Notes:
3381: The vector is owned by PETSc and users cannot modify it.
3383: Users need to call MatDenseRestoreColumnVecRead() when the vector is no longer needed.
3385: Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecWrite() for write-only access.
3387: Level: intermediate
3389: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3390: @*/
3391: PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v)
3392: {
3399: PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
3400: return 0;
3401: }
3403: /*@
3404: MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecRead().
3406: Collective
3408: Input Parameters:
3409: + mat - the Mat object
3410: . col - the column index
3411: - v - the Vec object (may be NULL)
3413: Level: intermediate
3415: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()`
3416: @*/
3417: PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v)
3418: {
3424: PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
3425: return 0;
3426: }
3428: /*@
3429: MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a Vec.
3431: Collective
3433: Input Parameters:
3434: + mat - the Mat object
3435: - col - the column index
3437: Output Parameter:
3438: . v - the vector
3440: Notes:
3441: The vector is owned by PETSc. Users need to call MatDenseRestoreColumnVecWrite() when the vector is no longer needed.
3443: Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecRead() for read-only access.
3445: Level: intermediate
3447: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3448: @*/
3449: PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v)
3450: {
3457: PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
3458: return 0;
3459: }
3461: /*@
3462: MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecWrite().
3464: Collective
3466: Input Parameters:
3467: + mat - the Mat object
3468: . col - the column index
3469: - v - the Vec object (may be NULL)
3471: Level: intermediate
3473: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`
3474: @*/
3475: PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v)
3476: {
3482: PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
3483: return 0;
3484: }
3486: /*@
3487: MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a Mat.
3489: Collective
3491: Input Parameters:
3492: + mat - the Mat object
3493: . rbegin - the first global row index in the block (if PETSC_DECIDE, is 0)
3494: . rend - the global row index past the last one in the block (if PETSC_DECIDE, is M)
3495: . cbegin - the first global column index in the block (if PETSC_DECIDE, is 0)
3496: - cend - the global column index past the last one in the block (if PETSC_DECIDE, is N)
3498: Output Parameter:
3499: . v - the matrix
3501: Notes:
3502: The matrix is owned by PETSc. Users need to call MatDenseRestoreSubMatrix() when the matrix is no longer needed.
3504: The output matrix is not redistributed by PETSc, so depending on the values of rbegin and rend, some processes may have no local rows.
3506: Level: intermediate
3508: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()`
3509: @*/
3510: PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
3511: {
3519: if (rbegin == PETSC_DECIDE) rbegin = 0;
3520: if (rend == PETSC_DECIDE) rend = A->rmap->N;
3521: if (cbegin == PETSC_DECIDE) cbegin = 0;
3522: if (cend == PETSC_DECIDE) cend = A->cmap->N;
3528: PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v));
3529: return 0;
3530: }
3532: /*@
3533: MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from MatDenseGetSubMatrix().
3535: Collective
3537: Input Parameters:
3538: + mat - the Mat object
3539: - v - the Mat object (may be NULL)
3541: Level: intermediate
3543: .seealso: `MATDENSE`, `MATDENSECUDA`, `MATDENSEHIP`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()`
3544: @*/
3545: PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v)
3546: {
3550: PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v));
3551: return 0;
3552: }
3554: #include <petscblaslapack.h>
3555: #include <petsc/private/kernels/blockinvert.h>
3557: PetscErrorCode MatSeqDenseInvert(Mat A)
3558: {
3559: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3560: PetscInt bs = A->rmap->n;
3561: MatScalar *values = a->v;
3562: const PetscReal shift = 0.0;
3563: PetscBool allowzeropivot = PetscNot(A->erroriffailure), zeropivotdetected = PETSC_FALSE;
3565: /* factor and invert each block */
3566: switch (bs) {
3567: case 1:
3568: values[0] = (PetscScalar)1.0 / (values[0] + shift);
3569: break;
3570: case 2:
3571: PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected);
3572: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3573: break;
3574: case 3:
3575: PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected);
3576: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3577: break;
3578: case 4:
3579: PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected);
3580: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3581: break;
3582: case 5: {
3583: PetscScalar work[25];
3584: PetscInt ipvt[5];
3586: PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected);
3587: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3588: } break;
3589: case 6:
3590: PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected);
3591: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3592: break;
3593: case 7:
3594: PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected);
3595: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3596: break;
3597: default: {
3598: PetscInt *v_pivots, *IJ, j;
3599: PetscScalar *v_work;
3601: PetscMalloc3(bs, &v_work, bs, &v_pivots, bs, &IJ);
3602: for (j = 0; j < bs; j++) IJ[j] = j;
3603: PetscKernel_A_gets_inverse_A(bs, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected);
3604: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3605: PetscFree3(v_work, v_pivots, IJ);
3606: }
3607: }
3608: return 0;
3609: }