Actual source code: kaij.c
2: /*
3: Defines the basic matrix operations for the KAIJ matrix storage format.
4: This format is used to evaluate matrices of the form:
6: [I \otimes S + A \otimes T]
8: where
9: S is a dense (p \times q) matrix
10: T is a dense (p \times q) matrix
11: A is an AIJ (n \times n) matrix
12: I is the identity matrix
14: The resulting matrix is (np \times nq)
16: We provide:
17: MatMult()
18: MatMultAdd()
19: MatInvertBlockDiagonal()
20: and
21: MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
23: This single directory handles both the sequential and parallel codes
24: */
26: #include <../src/mat/impls/kaij/kaij.h>
27: #include <../src/mat/utils/freespace.h>
28: #include <petsc/private/vecimpl.h>
30: /*@C
31: MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
33: Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
35: Input Parameter:
36: . A - the `MATKAIJ` matrix
38: Output Parameter:
39: . B - the `MATAIJ` matrix
41: Level: advanced
43: Note:
44: The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
46: .seealso: `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ`
47: @*/
48: PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B)
49: {
50: PetscBool ismpikaij, isseqkaij;
52: PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij);
53: PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij);
54: if (ismpikaij) {
55: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
57: *B = b->A;
58: } else if (isseqkaij) {
59: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
61: *B = b->AIJ;
62: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ");
63: return 0;
64: }
66: /*@C
67: MatKAIJGetS - Get the S matrix describing the shift action of the `MATKAIJ` matrix
69: Not Collective; the entire S is stored and returned independently on all processes.
71: Input Parameter:
72: . A - the `MATKAIJ` matrix
74: Output Parameters:
75: + m - the number of rows in S
76: . n - the number of columns in S
77: - S - the S matrix, in form of a scalar array in column-major format
79: Note:
80: All output parameters are optional (pass NULL if not desired)
82: Level: advanced
84: .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
85: @*/
86: PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S)
87: {
88: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
89: if (m) *m = b->p;
90: if (n) *n = b->q;
91: if (S) *S = b->S;
92: return 0;
93: }
95: /*@C
96: MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the `MATKAIJ` matrix
98: Not Collective; the entire S is stored and returned independently on all processes.
100: Input Parameter:
101: . A - the `MATKAIJ` matrix
103: Output Parameters:
104: + m - the number of rows in S
105: . n - the number of columns in S
106: - S - the S matrix, in form of a scalar array in column-major format
108: Note:
109: All output parameters are optional (pass NULL if not desired)
111: Level: advanced
113: .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
114: @*/
115: PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S)
116: {
117: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
118: if (m) *m = b->p;
119: if (n) *n = b->q;
120: if (S) *S = b->S;
121: return 0;
122: }
124: /*@C
125: MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()`
127: Not collective
129: Input Parameter:
130: . A - the `MATKAIJ` matrix
132: Output Parameter:
133: . S - location of pointer to array obtained with `MatKAIJGetS()`
135: Note:
136: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137: If NULL is passed, it will not attempt to zero the array pointer.
139: Level: advanced
141: .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
142: @*/
143: PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S)
144: {
145: if (S) *S = NULL;
146: PetscObjectStateIncrease((PetscObject)A);
147: return 0;
148: }
150: /*@C
151: MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()`
153: Not collective
155: Input Parameter:
156: . A - the `MATKAIJ` matrix
158: Output Parameter:
159: . S - location of pointer to array obtained with `MatKAIJGetS()`
161: Note:
162: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
163: If NULL is passed, it will not attempt to zero the array pointer.
165: Level: advanced
167: .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()`
168: @*/
169: PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S)
170: {
171: if (S) *S = NULL;
172: return 0;
173: }
175: /*@C
176: MatKAIJGetT - Get the transformation matrix T associated with the `MATKAIJ` matrix
178: Not Collective; the entire T is stored and returned independently on all processes
180: Input Parameter:
181: . A - the `MATKAIJ` matrix
183: Output Parameters:
184: + m - the number of rows in T
185: . n - the number of columns in T
186: - T - the T matrix, in form of a scalar array in column-major format
188: Note:
189: All output parameters are optional (pass NULL if not desired)
191: Level: advanced
193: .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
194: @*/
195: PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T)
196: {
197: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
198: if (m) *m = b->p;
199: if (n) *n = b->q;
200: if (T) *T = b->T;
201: return 0;
202: }
204: /*@C
205: MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the `MATKAIJ` matrix
207: Not Collective; the entire T is stored and returned independently on all processes
209: Input Parameter:
210: . A - the `MATKAIJ` matrix
212: Output Parameters:
213: + m - the number of rows in T
214: . n - the number of columns in T
215: - T - the T matrix, in form of a scalar array in column-major format
217: Note: All output parameters are optional (pass NULL if not desired)
219: Level: advanced
221: .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()`
222: @*/
223: PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T)
224: {
225: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
226: if (m) *m = b->p;
227: if (n) *n = b->q;
228: if (T) *T = b->T;
229: return 0;
230: }
232: /*@C
233: MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()`
235: Not collective
237: Input Parameter:
238: . A - the `MATKAIJ` matrix
240: Output Parameter:
241: . T - location of pointer to array obtained with `MatKAIJGetS()`
243: Note:
244: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
245: If NULL is passed, it will not attempt to zero the array pointer.
247: Level: advanced
249: .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
250: @*/
251: PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T)
252: {
253: if (T) *T = NULL;
254: PetscObjectStateIncrease((PetscObject)A);
255: return 0;
256: }
258: /*@C
259: MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()`
261: Not collective
263: Input Parameter:
264: . A - the `MATKAIJ` matrix
266: Output Parameter:
267: . T - location of pointer to array obtained with `MatKAIJGetS()`
269: Note:
270: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
271: If NULL is passed, it will not attempt to zero the array pointer.
273: Level: advanced
275: .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()`
276: @*/
277: PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T)
278: {
279: if (T) *T = NULL;
280: return 0;
281: }
283: /*@
284: MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix
286: Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel
288: Input Parameters:
289: + A - the `MATKAIJ` matrix
290: - B - the `MATAIJ` matrix
292: Notes:
293: This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
295: Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
297: Level: advanced
299: .seealso: `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`
300: @*/
301: PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B)
302: {
303: PetscMPIInt size;
304: PetscBool flg;
306: MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
307: if (size == 1) {
308: PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg);
310: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
311: a->AIJ = B;
312: } else {
313: Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data;
314: a->A = B;
315: }
316: PetscObjectReference((PetscObject)B);
317: return 0;
318: }
320: /*@C
321: MatKAIJSetS - Set the S matrix describing the shift action of the `MATKAIJ` matrix
323: Logically Collective; the entire S is stored independently on all processes.
325: Input Parameters:
326: + A - the `MATKAIJ` matrix
327: . p - the number of rows in S
328: . q - the number of columns in S
329: - S - the S matrix, in form of a scalar array in column-major format
331: Notes:
332: The dimensions p and q must match those of the transformation matrix T associated with the `MATKAIJ` matrix.
334: The S matrix is copied, so the user can destroy this array.
336: Level: advanced
338: .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()`
339: @*/
340: PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[])
341: {
342: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
344: PetscFree(a->S);
345: if (S) {
346: PetscMalloc1(p * q, &a->S);
347: PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar));
348: } else a->S = NULL;
350: a->p = p;
351: a->q = q;
352: return 0;
353: }
355: /*@C
356: MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
358: Logically Collective.
360: Input Parameter:
361: . A - the `MATKAIJ` matrix
363: Output Parameter:
364: . identity - the Boolean value
366: Level: Advanced
368: .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()`
369: @*/
370: PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity)
371: {
372: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
373: PetscInt i, j;
375: if (a->p != a->q) {
376: *identity = PETSC_FALSE;
377: return 0;
378: } else *identity = PETSC_TRUE;
379: if (!a->isTI || a->S) {
380: for (i = 0; i < a->p && *identity; i++) {
381: for (j = 0; j < a->p && *identity; j++) {
382: if (i != j) {
383: if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
384: if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
385: } else {
386: if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
387: if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
388: }
389: }
390: }
391: }
392: return 0;
393: }
395: /*@C
396: MatKAIJSetT - Set the transformation matrix T associated with the `MATKAIJ` matrix
398: Logically Collective; the entire T is stored independently on all processes.
400: Input Parameters:
401: + A - the `MATKAIJ` matrix
402: . p - the number of rows in S
403: . q - the number of columns in S
404: - T - the T matrix, in form of a scalar array in column-major format
406: Notes:
407: The dimensions p and q must match those of the shift matrix S associated with the `MATKAIJ` matrix.
409: The T matrix is copied, so the user can destroy this array.
411: Level: Advanced
413: .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()`
414: @*/
415: PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[])
416: {
417: PetscInt i, j;
418: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
419: PetscBool isTI = PETSC_FALSE;
421: /* check if T is an identity matrix */
422: if (T && (p == q)) {
423: isTI = PETSC_TRUE;
424: for (i = 0; i < p; i++) {
425: for (j = 0; j < q; j++) {
426: if (i == j) {
427: /* diagonal term must be 1 */
428: if (T[i + j * p] != 1.0) isTI = PETSC_FALSE;
429: } else {
430: /* off-diagonal term must be 0 */
431: if (T[i + j * p] != 0.0) isTI = PETSC_FALSE;
432: }
433: }
434: }
435: }
436: a->isTI = isTI;
438: PetscFree(a->T);
439: if (T && (!isTI)) {
440: PetscMalloc1(p * q, &a->T);
441: PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar));
442: } else a->T = NULL;
444: a->p = p;
445: a->q = q;
446: return 0;
447: }
449: PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
450: {
451: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
453: MatDestroy(&b->AIJ);
454: PetscFree(b->S);
455: PetscFree(b->T);
456: PetscFree(b->ibdiag);
457: PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr);
458: PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL);
459: PetscFree(A->data);
460: return 0;
461: }
463: PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
464: {
465: Mat_MPIKAIJ *a;
466: Mat_MPIAIJ *mpiaij;
467: PetscScalar *T;
468: PetscInt i, j;
469: PetscObjectState state;
471: a = (Mat_MPIKAIJ *)A->data;
472: mpiaij = (Mat_MPIAIJ *)a->A->data;
474: PetscObjectStateGet((PetscObject)a->A, &state);
475: if (state == a->state) {
476: /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
477: return 0;
478: } else {
479: MatDestroy(&a->AIJ);
480: MatDestroy(&a->OAIJ);
481: if (a->isTI) {
482: /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
483: * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
484: * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
485: * to pass in. */
486: PetscMalloc1(a->p * a->q, &T);
487: for (i = 0; i < a->p; i++) {
488: for (j = 0; j < a->q; j++) {
489: if (i == j) T[i + j * a->p] = 1.0;
490: else T[i + j * a->p] = 0.0;
491: }
492: }
493: } else T = a->T;
494: MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ);
495: MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ);
496: if (a->isTI) PetscFree(T);
497: a->state = state;
498: }
500: return 0;
501: }
503: PetscErrorCode MatSetUp_KAIJ(Mat A)
504: {
505: PetscInt n;
506: PetscMPIInt size;
507: Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data;
509: MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
510: if (size == 1) {
511: MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N);
512: PetscLayoutSetBlockSize(A->rmap, seqkaij->p);
513: PetscLayoutSetBlockSize(A->cmap, seqkaij->q);
514: PetscLayoutSetUp(A->rmap);
515: PetscLayoutSetUp(A->cmap);
516: } else {
517: Mat_MPIKAIJ *a;
518: Mat_MPIAIJ *mpiaij;
519: IS from, to;
520: Vec gvec;
522: a = (Mat_MPIKAIJ *)A->data;
523: mpiaij = (Mat_MPIAIJ *)a->A->data;
524: MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N);
525: PetscLayoutSetBlockSize(A->rmap, seqkaij->p);
526: PetscLayoutSetBlockSize(A->cmap, seqkaij->q);
527: PetscLayoutSetUp(A->rmap);
528: PetscLayoutSetUp(A->cmap);
530: MatKAIJ_build_AIJ_OAIJ(A);
532: VecGetSize(mpiaij->lvec, &n);
533: VecCreate(PETSC_COMM_SELF, &a->w);
534: VecSetSizes(a->w, n * a->q, n * a->q);
535: VecSetBlockSize(a->w, a->q);
536: VecSetType(a->w, VECSEQ);
538: /* create two temporary Index sets for build scatter gather */
539: ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from);
540: ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to);
542: /* create temporary global vector to generate scatter context */
543: VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec);
545: /* generate the scatter context */
546: VecScatterCreate(gvec, from, a->w, to, &a->ctx);
548: ISDestroy(&from);
549: ISDestroy(&to);
550: VecDestroy(&gvec);
551: }
553: A->assembled = PETSC_TRUE;
554: return 0;
555: }
557: PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer)
558: {
559: PetscViewerFormat format;
560: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
561: Mat B;
562: PetscInt i;
563: PetscBool ismpikaij;
565: PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij);
566: PetscViewerGetFormat(viewer, &format);
567: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
568: PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q);
570: /* Print appropriate details for S. */
571: if (!a->S) {
572: PetscViewerASCIIPrintf(viewer, "S is NULL\n");
573: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
574: PetscViewerASCIIPrintf(viewer, "Entries of S are ");
575: for (i = 0; i < (a->p * a->q); i++) {
576: #if defined(PETSC_USE_COMPLEX)
577: PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i]));
578: #else
579: PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i]));
580: #endif
581: }
582: PetscViewerASCIIPrintf(viewer, "\n");
583: }
585: /* Print appropriate details for T. */
586: if (a->isTI) {
587: PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n");
588: } else if (!a->T) {
589: PetscViewerASCIIPrintf(viewer, "T is NULL\n");
590: } else if (format == PETSC_VIEWER_ASCII_IMPL) {
591: PetscViewerASCIIPrintf(viewer, "Entries of T are ");
592: for (i = 0; i < (a->p * a->q); i++) {
593: #if defined(PETSC_USE_COMPLEX)
594: PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i]));
595: #else
596: PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i]));
597: #endif
598: }
599: PetscViewerASCIIPrintf(viewer, "\n");
600: }
602: /* Now print details for the AIJ matrix, using the AIJ viewer. */
603: PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n");
604: if (ismpikaij) {
605: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
606: MatView(b->A, viewer);
607: } else {
608: MatView(a->AIJ, viewer);
609: }
611: } else {
612: /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
613: MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B);
614: MatView(B, viewer);
615: MatDestroy(&B);
616: }
617: return 0;
618: }
620: PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
621: {
622: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
624: MatDestroy(&b->AIJ);
625: MatDestroy(&b->OAIJ);
626: MatDestroy(&b->A);
627: VecScatterDestroy(&b->ctx);
628: VecDestroy(&b->w);
629: PetscFree(b->S);
630: PetscFree(b->T);
631: PetscFree(b->ibdiag);
632: PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL);
633: PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL);
634: PetscFree(A->data);
635: return 0;
636: }
638: /* --------------------------------------------------------------------------------------*/
640: /* zz = yy + Axx */
641: PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
642: {
643: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
644: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
645: const PetscScalar *s = b->S, *t = b->T;
646: const PetscScalar *x, *v, *bx;
647: PetscScalar *y, *sums;
648: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
649: PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k;
651: if (!yy) {
652: VecSet(zz, 0.0);
653: } else {
654: VecCopy(yy, zz);
655: }
656: if ((!s) && (!t) && (!b->isTI)) return 0;
658: VecGetArrayRead(xx, &x);
659: VecGetArray(zz, &y);
660: idx = a->j;
661: v = a->a;
662: ii = a->i;
664: if (b->isTI) {
665: for (i = 0; i < m; i++) {
666: jrow = ii[i];
667: n = ii[i + 1] - jrow;
668: sums = y + p * i;
669: for (j = 0; j < n; j++) {
670: for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k];
671: }
672: }
673: PetscLogFlops(3.0 * (a->nz) * p);
674: } else if (t) {
675: for (i = 0; i < m; i++) {
676: jrow = ii[i];
677: n = ii[i + 1] - jrow;
678: sums = y + p * i;
679: for (j = 0; j < n; j++) {
680: for (k = 0; k < p; k++) {
681: for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l];
682: }
683: }
684: }
685: /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
686: * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
687: * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
688: * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
689: PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz);
690: }
691: if (s) {
692: for (i = 0; i < m; i++) {
693: sums = y + p * i;
694: bx = x + q * i;
695: if (i < b->AIJ->cmap->n) {
696: for (j = 0; j < q; j++) {
697: for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j];
698: }
699: }
700: }
701: PetscLogFlops(2.0 * m * p * q);
702: }
704: VecRestoreArrayRead(xx, &x);
705: VecRestoreArray(zz, &y);
706: return 0;
707: }
709: PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy)
710: {
711: MatMultAdd_SeqKAIJ(A, xx, PETSC_NULL, yy);
712: return 0;
713: }
715: #include <petsc/private/kernels/blockinvert.h>
717: PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values)
718: {
719: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
720: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
721: const PetscScalar *S = b->S;
722: const PetscScalar *T = b->T;
723: const PetscScalar *v = a->a;
724: const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
725: PetscInt i, j, *v_pivots, dof, dof2;
726: PetscScalar *diag, aval, *v_work;
731: dof = p;
732: dof2 = dof * dof;
734: if (b->ibdiagvalid) {
735: if (values) *values = b->ibdiag;
736: return 0;
737: }
738: if (!b->ibdiag) { PetscMalloc1(dof2 * m, &b->ibdiag); }
739: if (values) *values = b->ibdiag;
740: diag = b->ibdiag;
742: PetscMalloc2(dof, &v_work, dof, &v_pivots);
743: for (i = 0; i < m; i++) {
744: if (S) {
745: PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar));
746: } else {
747: PetscMemzero(diag, dof2 * sizeof(PetscScalar));
748: }
749: if (b->isTI) {
750: aval = 0;
751: for (j = ii[i]; j < ii[i + 1]; j++)
752: if (idx[j] == i) aval = v[j];
753: for (j = 0; j < dof; j++) diag[j + dof * j] += aval;
754: } else if (T) {
755: aval = 0;
756: for (j = ii[i]; j < ii[i + 1]; j++)
757: if (idx[j] == i) aval = v[j];
758: for (j = 0; j < dof2; j++) diag[j] += aval * T[j];
759: }
760: PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL);
761: diag += dof2;
762: }
763: PetscFree2(v_work, v_pivots);
765: b->ibdiagvalid = PETSC_TRUE;
766: return 0;
767: }
769: static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B)
770: {
771: Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data;
773: *B = kaij->AIJ;
774: return 0;
775: }
777: static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
778: {
779: Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data;
780: Mat AIJ, OAIJ, B;
781: PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d;
782: const PetscInt p = a->p, q = a->q;
783: PetscBool ismpikaij, missing;
785: if (reuse != MAT_REUSE_MATRIX) {
786: PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij);
787: if (ismpikaij) {
788: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
789: AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
790: OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
791: } else {
792: AIJ = a->AIJ;
793: OAIJ = NULL;
794: }
795: MatCreate(PetscObjectComm((PetscObject)A), &B);
796: MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
797: MatSetType(B, MATAIJ);
798: MatGetSize(AIJ, &m, NULL);
799: MatMissingDiagonal(AIJ, &missing, &d); /* assumption that all successive rows will have a missing diagonal */
800: if (!missing || !a->S) d = m;
801: PetscMalloc1(m * p, &d_nnz);
802: for (i = 0; i < m; ++i) {
803: MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL);
804: for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q;
805: MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL);
806: }
807: if (OAIJ) {
808: PetscMalloc1(m * p, &o_nnz);
809: for (i = 0; i < m; ++i) {
810: MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL);
811: for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q;
812: MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL);
813: }
814: MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz);
815: } else {
816: MatSeqAIJSetPreallocation(B, 0, d_nnz);
817: }
818: PetscFree(d_nnz);
819: PetscFree(o_nnz);
820: } else B = *newmat;
821: MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B);
822: if (reuse == MAT_INPLACE_MATRIX) {
823: MatHeaderReplace(A, &B);
824: } else *newmat = B;
825: return 0;
826: }
828: PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
829: {
830: Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data;
831: Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data;
832: const PetscScalar *aa = a->a, *T = kaij->T, *v;
833: const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi;
834: const PetscScalar *b, *xb, *idiag;
835: PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt;
836: PetscInt i, j, k, i2, bs, bs2, nz;
838: its = its * lits;
844: bs = p;
845: bs2 = bs * bs;
847: if (!m) return 0;
849: if (!kaij->ibdiagvalid) MatInvertBlockDiagonal_SeqKAIJ(A, NULL);
850: idiag = kaij->ibdiag;
851: diag = a->diag;
853: if (!kaij->sor.setup) {
854: PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr);
855: kaij->sor.setup = PETSC_TRUE;
856: }
857: y = kaij->sor.y;
858: w = kaij->sor.w;
859: work = kaij->sor.work;
860: t = kaij->sor.t;
861: arr = kaij->sor.arr;
863: VecGetArray(xx, &x);
864: VecGetArrayRead(bb, &b);
866: if (flag & SOR_ZERO_INITIAL_GUESS) {
867: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
868: PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */
869: PetscMemcpy(t, b, bs * sizeof(PetscScalar));
870: i2 = bs;
871: idiag += bs2;
872: for (i = 1; i < m; i++) {
873: v = aa + ai[i];
874: vi = aj + ai[i];
875: nz = diag[i] - ai[i];
877: if (T) { /* b - T (Arow * x) */
878: PetscMemzero(w, bs * sizeof(PetscScalar));
879: for (j = 0; j < nz; j++) {
880: for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
881: }
882: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]);
883: for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k];
884: } else if (kaij->isTI) {
885: PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar));
886: for (j = 0; j < nz; j++) {
887: for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k];
888: }
889: } else {
890: PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar));
891: }
893: PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y);
894: for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j];
896: idiag += bs2;
897: i2 += bs;
898: }
899: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
900: PetscLogFlops(1.0 * bs2 * a->nz);
901: xb = t;
902: } else xb = b;
903: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
904: idiag = kaij->ibdiag + bs2 * (m - 1);
905: i2 = bs * (m - 1);
906: PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar));
907: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
908: i2 -= bs;
909: idiag -= bs2;
910: for (i = m - 2; i >= 0; i--) {
911: v = aa + diag[i] + 1;
912: vi = aj + diag[i] + 1;
913: nz = ai[i + 1] - diag[i] - 1;
915: if (T) { /* FIXME: This branch untested */
916: PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar));
917: /* copy all rows of x that are needed into contiguous space */
918: workt = work;
919: for (j = 0; j < nz; j++) {
920: PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar));
921: workt += bs;
922: }
923: arrt = arr;
924: for (j = 0; j < nz; j++) {
925: PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar));
926: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
927: arrt += bs2;
928: }
929: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
930: } else if (kaij->isTI) {
931: PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar));
932: for (j = 0; j < nz; j++) {
933: for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k];
934: }
935: }
937: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */
938: for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j];
940: idiag -= bs2;
941: i2 -= bs;
942: }
943: PetscLogFlops(1.0 * bs2 * (a->nz));
944: }
945: its--;
946: }
947: while (its--) { /* FIXME: This branch not updated */
948: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
949: i2 = 0;
950: idiag = kaij->ibdiag;
951: for (i = 0; i < m; i++) {
952: PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar));
954: v = aa + ai[i];
955: vi = aj + ai[i];
956: nz = diag[i] - ai[i];
957: workt = work;
958: for (j = 0; j < nz; j++) {
959: PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar));
960: workt += bs;
961: }
962: arrt = arr;
963: if (T) {
964: for (j = 0; j < nz; j++) {
965: PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar));
966: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
967: arrt += bs2;
968: }
969: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
970: } else if (kaij->isTI) {
971: for (j = 0; j < nz; j++) {
972: PetscMemzero(arrt, bs2 * sizeof(PetscScalar));
973: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
974: arrt += bs2;
975: }
976: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
977: }
978: PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar));
980: v = aa + diag[i] + 1;
981: vi = aj + diag[i] + 1;
982: nz = ai[i + 1] - diag[i] - 1;
983: workt = work;
984: for (j = 0; j < nz; j++) {
985: PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar));
986: workt += bs;
987: }
988: arrt = arr;
989: if (T) {
990: for (j = 0; j < nz; j++) {
991: PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar));
992: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
993: arrt += bs2;
994: }
995: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
996: } else if (kaij->isTI) {
997: for (j = 0; j < nz; j++) {
998: PetscMemzero(arrt, bs2 * sizeof(PetscScalar));
999: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1000: arrt += bs2;
1001: }
1002: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1003: }
1005: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1006: for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1008: idiag += bs2;
1009: i2 += bs;
1010: }
1011: xb = t;
1012: } else xb = b;
1013: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1014: idiag = kaij->ibdiag + bs2 * (m - 1);
1015: i2 = bs * (m - 1);
1016: if (xb == b) {
1017: for (i = m - 1; i >= 0; i--) {
1018: PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar));
1020: v = aa + ai[i];
1021: vi = aj + ai[i];
1022: nz = diag[i] - ai[i];
1023: workt = work;
1024: for (j = 0; j < nz; j++) {
1025: PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar));
1026: workt += bs;
1027: }
1028: arrt = arr;
1029: if (T) {
1030: for (j = 0; j < nz; j++) {
1031: PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar));
1032: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1033: arrt += bs2;
1034: }
1035: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1036: } else if (kaij->isTI) {
1037: for (j = 0; j < nz; j++) {
1038: PetscMemzero(arrt, bs2 * sizeof(PetscScalar));
1039: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1040: arrt += bs2;
1041: }
1042: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1043: }
1045: v = aa + diag[i] + 1;
1046: vi = aj + diag[i] + 1;
1047: nz = ai[i + 1] - diag[i] - 1;
1048: workt = work;
1049: for (j = 0; j < nz; j++) {
1050: PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar));
1051: workt += bs;
1052: }
1053: arrt = arr;
1054: if (T) {
1055: for (j = 0; j < nz; j++) {
1056: PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar));
1057: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1058: arrt += bs2;
1059: }
1060: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1061: } else if (kaij->isTI) {
1062: for (j = 0; j < nz; j++) {
1063: PetscMemzero(arrt, bs2 * sizeof(PetscScalar));
1064: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1065: arrt += bs2;
1066: }
1067: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1068: }
1070: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1071: for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1072: }
1073: } else {
1074: for (i = m - 1; i >= 0; i--) {
1075: PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar));
1076: v = aa + diag[i] + 1;
1077: vi = aj + diag[i] + 1;
1078: nz = ai[i + 1] - diag[i] - 1;
1079: workt = work;
1080: for (j = 0; j < nz; j++) {
1081: PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar));
1082: workt += bs;
1083: }
1084: arrt = arr;
1085: if (T) {
1086: for (j = 0; j < nz; j++) {
1087: PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar));
1088: for (k = 0; k < bs2; k++) arrt[k] *= v[j];
1089: arrt += bs2;
1090: }
1091: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1092: } else if (kaij->isTI) {
1093: for (j = 0; j < nz; j++) {
1094: PetscMemzero(arrt, bs2 * sizeof(PetscScalar));
1095: for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j];
1096: arrt += bs2;
1097: }
1098: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work);
1099: }
1100: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y);
1101: for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j);
1102: }
1103: }
1104: PetscLogFlops(1.0 * bs2 * (a->nz));
1105: }
1106: }
1108: VecRestoreArray(xx, &x);
1109: VecRestoreArrayRead(bb, &b);
1110: return 0;
1111: }
1113: /*===================================================================================*/
1115: PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1116: {
1117: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1119: if (!yy) {
1120: VecSet(zz, 0.0);
1121: } else {
1122: VecCopy(yy, zz);
1123: }
1124: MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ and b->OAIJ are up to date. */
1125: /* start the scatter */
1126: VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD);
1127: (*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz);
1128: VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD);
1129: (*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz);
1130: return 0;
1131: }
1133: PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy)
1134: {
1135: MatMultAdd_MPIKAIJ(A, xx, PETSC_NULL, yy);
1136: return 0;
1137: }
1139: PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values)
1140: {
1141: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1143: MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ is up to date. */
1144: (*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values);
1145: return 0;
1146: }
1148: /* ----------------------------------------------------------------*/
1150: PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1151: {
1152: Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data;
1153: PetscErrorCode diag = PETSC_FALSE;
1154: PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c;
1155: PetscScalar *vaij, *v, *S = b->S, *T = b->T;
1158: b->getrowactive = PETSC_TRUE;
1161: if ((!S) && (!T) && (!b->isTI)) {
1162: if (ncols) *ncols = 0;
1163: if (cols) *cols = NULL;
1164: if (values) *values = NULL;
1165: return 0;
1166: }
1168: if (T || b->isTI) {
1169: MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij);
1170: c = nzaij;
1171: for (i = 0; i < nzaij; i++) {
1172: /* check if this row contains a diagonal entry */
1173: if (colsaij[i] == r) {
1174: diag = PETSC_TRUE;
1175: c = i;
1176: }
1177: }
1178: } else nzaij = c = 0;
1180: /* calculate size of row */
1181: nz = 0;
1182: if (S) nz += q;
1183: if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q);
1185: if (cols || values) {
1186: PetscMalloc2(nz, &idx, nz, &v);
1187: for (i = 0; i < q; i++) {
1188: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1189: v[i] = 0.0;
1190: }
1191: if (b->isTI) {
1192: for (i = 0; i < nzaij; i++) {
1193: for (j = 0; j < q; j++) {
1194: idx[i * q + j] = colsaij[i] * q + j;
1195: v[i * q + j] = (j == s ? vaij[i] : 0);
1196: }
1197: }
1198: } else if (T) {
1199: for (i = 0; i < nzaij; i++) {
1200: for (j = 0; j < q; j++) {
1201: idx[i * q + j] = colsaij[i] * q + j;
1202: v[i * q + j] = vaij[i] * T[s + j * p];
1203: }
1204: }
1205: }
1206: if (S) {
1207: for (j = 0; j < q; j++) {
1208: idx[c * q + j] = r * q + j;
1209: v[c * q + j] += S[s + j * p];
1210: }
1211: }
1212: }
1214: if (ncols) *ncols = nz;
1215: if (cols) *cols = idx;
1216: if (values) *values = v;
1217: return 0;
1218: }
1220: PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1221: {
1222: if (nz) *nz = 0;
1223: PetscFree2(*idx, *v);
1224: ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1225: return 0;
1226: }
1228: PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values)
1229: {
1230: Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data;
1231: Mat AIJ = b->A;
1232: PetscBool diag = PETSC_FALSE;
1233: Mat MatAIJ, MatOAIJ;
1234: const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray;
1235: PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow;
1236: PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T;
1238: MatKAIJ_build_AIJ_OAIJ(A); /* Ensure b->AIJ and b->OAIJ are up to date. */
1239: MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ;
1240: MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ;
1242: b->getrowactive = PETSC_TRUE;
1244: lrow = row - rstart;
1246: if ((!S) && (!T) && (!b->isTI)) {
1247: if (ncols) *ncols = 0;
1248: if (cols) *cols = NULL;
1249: if (values) *values = NULL;
1250: return 0;
1251: }
1253: r = lrow / p;
1254: s = lrow % p;
1256: if (T || b->isTI) {
1257: MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray);
1258: MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals);
1259: MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals);
1261: c = ncolsaij + ncolsoaij;
1262: for (i = 0; i < ncolsaij; i++) {
1263: /* check if this row contains a diagonal entry */
1264: if (colsaij[i] == r) {
1265: diag = PETSC_TRUE;
1266: c = i;
1267: }
1268: }
1269: } else c = 0;
1271: /* calculate size of row */
1272: nz = 0;
1273: if (S) nz += q;
1274: if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q);
1276: if (cols || values) {
1277: PetscMalloc2(nz, &idx, nz, &v);
1278: for (i = 0; i < q; i++) {
1279: /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1280: v[i] = 0.0;
1281: }
1282: if (b->isTI) {
1283: for (i = 0; i < ncolsaij; i++) {
1284: for (j = 0; j < q; j++) {
1285: idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1286: v[i * q + j] = (j == s ? vals[i] : 0.0);
1287: }
1288: }
1289: for (i = 0; i < ncolsoaij; i++) {
1290: for (j = 0; j < q; j++) {
1291: idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1292: v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0);
1293: }
1294: }
1295: } else if (T) {
1296: for (i = 0; i < ncolsaij; i++) {
1297: for (j = 0; j < q; j++) {
1298: idx[i * q + j] = (colsaij[i] + rstart / p) * q + j;
1299: v[i * q + j] = vals[i] * T[s + j * p];
1300: }
1301: }
1302: for (i = 0; i < ncolsoaij; i++) {
1303: for (j = 0; j < q; j++) {
1304: idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j;
1305: v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p];
1306: }
1307: }
1308: }
1309: if (S) {
1310: for (j = 0; j < q; j++) {
1311: idx[c * q + j] = (r + rstart / p) * q + j;
1312: v[c * q + j] += S[s + j * p];
1313: }
1314: }
1315: }
1317: if (ncols) *ncols = nz;
1318: if (cols) *cols = idx;
1319: if (values) *values = v;
1320: return 0;
1321: }
1323: PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1324: {
1325: PetscFree2(*idx, *v);
1326: ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE;
1327: return 0;
1328: }
1330: PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
1331: {
1332: Mat A;
1334: MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A);
1335: MatCreateSubMatrix(A, isrow, iscol, cll, newmat);
1336: MatDestroy(&A);
1337: return 0;
1338: }
1340: /* ---------------------------------------------------------------------------------- */
1341: /*@C
1342: MatCreateKAIJ - Creates a matrix of type `MATKAIJ` to be used for matrices of the following form:
1344: [I \otimes S + A \otimes T]
1346: where
1347: S is a dense (p \times q) matrix
1348: T is a dense (p \times q) matrix
1349: A is an `MATAIJ` (n \times n) matrix
1350: I is the identity matrix
1351: The resulting matrix is (np \times nq)
1353: S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1355: Collective
1357: Input Parameters:
1358: + A - the `MATAIJ` matrix
1359: . p - number of rows in S and T
1360: . q - number of columns in S and T
1361: . S - the S matrix (can be NULL), stored as a `PetscScalar` array (column-major)
1362: - T - the T matrix (can be NULL), stored as a `PetscScalar` array (column-major)
1364: Output Parameter:
1365: . kaij - the new `MATKAIJ` matrix
1367: Notes:
1368: This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed.
1370: Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix.
1372: Developer Note:
1373: In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1374: of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1375: rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1376: routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1378: Level: advanced
1380: .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ`
1381: @*/
1382: PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij)
1383: {
1384: MatCreate(PetscObjectComm((PetscObject)A), kaij);
1385: MatSetType(*kaij, MATKAIJ);
1386: MatKAIJSetAIJ(*kaij, A);
1387: MatKAIJSetS(*kaij, p, q, S);
1388: MatKAIJSetT(*kaij, p, q, T);
1389: MatSetUp(*kaij);
1390: return 0;
1391: }
1393: /*MC
1394: MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1395: [I \otimes S + A \otimes T],
1396: where
1397: S is a dense (p \times q) matrix,
1398: T is a dense (p \times q) matrix,
1399: A is an AIJ (n \times n) matrix,
1400: and I is the identity matrix.
1401: The resulting matrix is (np \times nq).
1403: S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format.
1405: Note:
1406: A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1407: where x and b are column vectors containing the row-major representations of X and B.
1409: Level: advanced
1411: .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()`
1412: M*/
1414: PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1415: {
1416: Mat_MPIKAIJ *b;
1417: PetscMPIInt size;
1419: PetscNew(&b);
1420: A->data = (void *)b;
1422: PetscMemzero(A->ops, sizeof(struct _MatOps));
1424: b->w = NULL;
1425: MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
1426: if (size == 1) {
1427: PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ);
1428: A->ops->destroy = MatDestroy_SeqKAIJ;
1429: A->ops->mult = MatMult_SeqKAIJ;
1430: A->ops->multadd = MatMultAdd_SeqKAIJ;
1431: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1432: A->ops->getrow = MatGetRow_SeqKAIJ;
1433: A->ops->restorerow = MatRestoreRow_SeqKAIJ;
1434: A->ops->sor = MatSOR_SeqKAIJ;
1435: PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ);
1436: } else {
1437: PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ);
1438: A->ops->destroy = MatDestroy_MPIKAIJ;
1439: A->ops->mult = MatMult_MPIKAIJ;
1440: A->ops->multadd = MatMultAdd_MPIKAIJ;
1441: A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1442: A->ops->getrow = MatGetRow_MPIKAIJ;
1443: A->ops->restorerow = MatRestoreRow_MPIKAIJ;
1444: PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ);
1445: PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ);
1446: }
1447: A->ops->setup = MatSetUp_KAIJ;
1448: A->ops->view = MatView_KAIJ;
1449: A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1450: return 0;
1451: }