Actual source code: maij.c
2: #include <../src/mat/impls/maij/maij.h>
3: #include <../src/mat/utils/freespace.h>
5: /*@
6: MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
8: Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
10: Input Parameter:
11: . A - the `MATMAIJ` matrix
13: Output Parameter:
14: . B - the `MATAIJ` matrix
16: Level: advanced
18: Note:
19: The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
21: .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
22: @*/
23: PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
24: {
25: PetscBool ismpimaij, isseqmaij;
27: PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij);
28: PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij);
29: if (ismpimaij) {
30: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
32: *B = b->A;
33: } else if (isseqmaij) {
34: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
36: *B = b->AIJ;
37: } else {
38: *B = A;
39: }
40: return 0;
41: }
43: /*@
44: MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
46: Logically Collective
48: Input Parameters:
49: + A - the `MATMAIJ` matrix
50: - dof - the block size for the new matrix
52: Output Parameter:
53: . B - the new `MATMAIJ` matrix
55: Level: advanced
57: .seealso: `MATMAIJ`, `MatCreateMAIJ()`
58: @*/
59: PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
60: {
61: Mat Aij = NULL;
64: MatMAIJGetAIJ(A, &Aij);
65: MatCreateMAIJ(Aij, dof, B);
66: return 0;
67: }
69: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
70: {
71: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
73: MatDestroy(&b->AIJ);
74: PetscFree(A->data);
75: PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL);
76: PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL);
77: PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL);
78: return 0;
79: }
81: PetscErrorCode MatSetUp_MAIJ(Mat A)
82: {
83: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
84: }
86: PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
87: {
88: Mat B;
90: MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B);
91: MatView(B, viewer);
92: MatDestroy(&B);
93: return 0;
94: }
96: PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
97: {
98: Mat B;
100: MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B);
101: MatView(B, viewer);
102: MatDestroy(&B);
103: return 0;
104: }
106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
108: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
110: MatDestroy(&b->AIJ);
111: MatDestroy(&b->OAIJ);
112: MatDestroy(&b->A);
113: VecScatterDestroy(&b->ctx);
114: VecDestroy(&b->w);
115: PetscFree(A->data);
116: PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL);
117: PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL);
118: PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL);
119: PetscObjectChangeTypeName((PetscObject)A, NULL);
120: return 0;
121: }
123: /*MC
124: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
125: multicomponent problems, interpolating or restricting each component the same way independently.
126: The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
128: Operations provided:
129: .vb
130: MatMult()
131: MatMultTranspose()
132: MatMultAdd()
133: MatMultTransposeAdd()
134: .ve
136: Level: advanced
138: .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
139: M*/
141: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
142: {
143: Mat_MPIMAIJ *b;
144: PetscMPIInt size;
146: PetscNew(&b);
147: A->data = (void *)b;
149: PetscMemzero(A->ops, sizeof(struct _MatOps));
151: A->ops->setup = MatSetUp_MAIJ;
153: b->AIJ = NULL;
154: b->dof = 0;
155: b->OAIJ = NULL;
156: b->ctx = NULL;
157: b->w = NULL;
158: MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
159: if (size == 1) {
160: PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ);
161: } else {
162: PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ);
163: }
164: A->preallocated = PETSC_TRUE;
165: A->assembled = PETSC_TRUE;
166: return 0;
167: }
169: /* --------------------------------------------------------------------------------------*/
170: PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy)
171: {
172: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
173: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
174: const PetscScalar *x, *v;
175: PetscScalar *y, sum1, sum2;
176: PetscInt nonzerorow = 0, n, i, jrow, j;
177: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
179: VecGetArrayRead(xx, &x);
180: VecGetArray(yy, &y);
181: idx = a->j;
182: v = a->a;
183: ii = a->i;
185: for (i = 0; i < m; i++) {
186: jrow = ii[i];
187: n = ii[i + 1] - jrow;
188: sum1 = 0.0;
189: sum2 = 0.0;
191: nonzerorow += (n > 0);
192: for (j = 0; j < n; j++) {
193: sum1 += v[jrow] * x[2 * idx[jrow]];
194: sum2 += v[jrow] * x[2 * idx[jrow] + 1];
195: jrow++;
196: }
197: y[2 * i] = sum1;
198: y[2 * i + 1] = sum2;
199: }
201: PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow);
202: VecRestoreArrayRead(xx, &x);
203: VecRestoreArray(yy, &y);
204: return 0;
205: }
207: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy)
208: {
209: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
210: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
211: const PetscScalar *x, *v;
212: PetscScalar *y, alpha1, alpha2;
213: PetscInt n, i;
214: const PetscInt m = b->AIJ->rmap->n, *idx;
216: VecSet(yy, 0.0);
217: VecGetArrayRead(xx, &x);
218: VecGetArray(yy, &y);
220: for (i = 0; i < m; i++) {
221: idx = a->j + a->i[i];
222: v = a->a + a->i[i];
223: n = a->i[i + 1] - a->i[i];
224: alpha1 = x[2 * i];
225: alpha2 = x[2 * i + 1];
226: while (n-- > 0) {
227: y[2 * (*idx)] += alpha1 * (*v);
228: y[2 * (*idx) + 1] += alpha2 * (*v);
229: idx++;
230: v++;
231: }
232: }
233: PetscLogFlops(4.0 * a->nz);
234: VecRestoreArrayRead(xx, &x);
235: VecRestoreArray(yy, &y);
236: return 0;
237: }
239: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz)
240: {
241: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
242: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
243: const PetscScalar *x, *v;
244: PetscScalar *y, sum1, sum2;
245: PetscInt n, i, jrow, j;
246: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
248: if (yy != zz) VecCopy(yy, zz);
249: VecGetArrayRead(xx, &x);
250: VecGetArray(zz, &y);
251: idx = a->j;
252: v = a->a;
253: ii = a->i;
255: for (i = 0; i < m; i++) {
256: jrow = ii[i];
257: n = ii[i + 1] - jrow;
258: sum1 = 0.0;
259: sum2 = 0.0;
260: for (j = 0; j < n; j++) {
261: sum1 += v[jrow] * x[2 * idx[jrow]];
262: sum2 += v[jrow] * x[2 * idx[jrow] + 1];
263: jrow++;
264: }
265: y[2 * i] += sum1;
266: y[2 * i + 1] += sum2;
267: }
269: PetscLogFlops(4.0 * a->nz);
270: VecRestoreArrayRead(xx, &x);
271: VecRestoreArray(zz, &y);
272: return 0;
273: }
274: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz)
275: {
276: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
277: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
278: const PetscScalar *x, *v;
279: PetscScalar *y, alpha1, alpha2;
280: PetscInt n, i;
281: const PetscInt m = b->AIJ->rmap->n, *idx;
283: if (yy != zz) VecCopy(yy, zz);
284: VecGetArrayRead(xx, &x);
285: VecGetArray(zz, &y);
287: for (i = 0; i < m; i++) {
288: idx = a->j + a->i[i];
289: v = a->a + a->i[i];
290: n = a->i[i + 1] - a->i[i];
291: alpha1 = x[2 * i];
292: alpha2 = x[2 * i + 1];
293: while (n-- > 0) {
294: y[2 * (*idx)] += alpha1 * (*v);
295: y[2 * (*idx) + 1] += alpha2 * (*v);
296: idx++;
297: v++;
298: }
299: }
300: PetscLogFlops(4.0 * a->nz);
301: VecRestoreArrayRead(xx, &x);
302: VecRestoreArray(zz, &y);
303: return 0;
304: }
305: /* --------------------------------------------------------------------------------------*/
306: PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy)
307: {
308: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
309: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
310: const PetscScalar *x, *v;
311: PetscScalar *y, sum1, sum2, sum3;
312: PetscInt nonzerorow = 0, n, i, jrow, j;
313: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
315: VecGetArrayRead(xx, &x);
316: VecGetArray(yy, &y);
317: idx = a->j;
318: v = a->a;
319: ii = a->i;
321: for (i = 0; i < m; i++) {
322: jrow = ii[i];
323: n = ii[i + 1] - jrow;
324: sum1 = 0.0;
325: sum2 = 0.0;
326: sum3 = 0.0;
328: nonzerorow += (n > 0);
329: for (j = 0; j < n; j++) {
330: sum1 += v[jrow] * x[3 * idx[jrow]];
331: sum2 += v[jrow] * x[3 * idx[jrow] + 1];
332: sum3 += v[jrow] * x[3 * idx[jrow] + 2];
333: jrow++;
334: }
335: y[3 * i] = sum1;
336: y[3 * i + 1] = sum2;
337: y[3 * i + 2] = sum3;
338: }
340: PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow);
341: VecRestoreArrayRead(xx, &x);
342: VecRestoreArray(yy, &y);
343: return 0;
344: }
346: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy)
347: {
348: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
349: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
350: const PetscScalar *x, *v;
351: PetscScalar *y, alpha1, alpha2, alpha3;
352: PetscInt n, i;
353: const PetscInt m = b->AIJ->rmap->n, *idx;
355: VecSet(yy, 0.0);
356: VecGetArrayRead(xx, &x);
357: VecGetArray(yy, &y);
359: for (i = 0; i < m; i++) {
360: idx = a->j + a->i[i];
361: v = a->a + a->i[i];
362: n = a->i[i + 1] - a->i[i];
363: alpha1 = x[3 * i];
364: alpha2 = x[3 * i + 1];
365: alpha3 = x[3 * i + 2];
366: while (n-- > 0) {
367: y[3 * (*idx)] += alpha1 * (*v);
368: y[3 * (*idx) + 1] += alpha2 * (*v);
369: y[3 * (*idx) + 2] += alpha3 * (*v);
370: idx++;
371: v++;
372: }
373: }
374: PetscLogFlops(6.0 * a->nz);
375: VecRestoreArrayRead(xx, &x);
376: VecRestoreArray(yy, &y);
377: return 0;
378: }
380: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz)
381: {
382: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
383: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
384: const PetscScalar *x, *v;
385: PetscScalar *y, sum1, sum2, sum3;
386: PetscInt n, i, jrow, j;
387: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
389: if (yy != zz) VecCopy(yy, zz);
390: VecGetArrayRead(xx, &x);
391: VecGetArray(zz, &y);
392: idx = a->j;
393: v = a->a;
394: ii = a->i;
396: for (i = 0; i < m; i++) {
397: jrow = ii[i];
398: n = ii[i + 1] - jrow;
399: sum1 = 0.0;
400: sum2 = 0.0;
401: sum3 = 0.0;
402: for (j = 0; j < n; j++) {
403: sum1 += v[jrow] * x[3 * idx[jrow]];
404: sum2 += v[jrow] * x[3 * idx[jrow] + 1];
405: sum3 += v[jrow] * x[3 * idx[jrow] + 2];
406: jrow++;
407: }
408: y[3 * i] += sum1;
409: y[3 * i + 1] += sum2;
410: y[3 * i + 2] += sum3;
411: }
413: PetscLogFlops(6.0 * a->nz);
414: VecRestoreArrayRead(xx, &x);
415: VecRestoreArray(zz, &y);
416: return 0;
417: }
418: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz)
419: {
420: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
421: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
422: const PetscScalar *x, *v;
423: PetscScalar *y, alpha1, alpha2, alpha3;
424: PetscInt n, i;
425: const PetscInt m = b->AIJ->rmap->n, *idx;
427: if (yy != zz) VecCopy(yy, zz);
428: VecGetArrayRead(xx, &x);
429: VecGetArray(zz, &y);
430: for (i = 0; i < m; i++) {
431: idx = a->j + a->i[i];
432: v = a->a + a->i[i];
433: n = a->i[i + 1] - a->i[i];
434: alpha1 = x[3 * i];
435: alpha2 = x[3 * i + 1];
436: alpha3 = x[3 * i + 2];
437: while (n-- > 0) {
438: y[3 * (*idx)] += alpha1 * (*v);
439: y[3 * (*idx) + 1] += alpha2 * (*v);
440: y[3 * (*idx) + 2] += alpha3 * (*v);
441: idx++;
442: v++;
443: }
444: }
445: PetscLogFlops(6.0 * a->nz);
446: VecRestoreArrayRead(xx, &x);
447: VecRestoreArray(zz, &y);
448: return 0;
449: }
451: /* ------------------------------------------------------------------------------*/
452: PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy)
453: {
454: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
455: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
456: const PetscScalar *x, *v;
457: PetscScalar *y, sum1, sum2, sum3, sum4;
458: PetscInt nonzerorow = 0, n, i, jrow, j;
459: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
461: VecGetArrayRead(xx, &x);
462: VecGetArray(yy, &y);
463: idx = a->j;
464: v = a->a;
465: ii = a->i;
467: for (i = 0; i < m; i++) {
468: jrow = ii[i];
469: n = ii[i + 1] - jrow;
470: sum1 = 0.0;
471: sum2 = 0.0;
472: sum3 = 0.0;
473: sum4 = 0.0;
474: nonzerorow += (n > 0);
475: for (j = 0; j < n; j++) {
476: sum1 += v[jrow] * x[4 * idx[jrow]];
477: sum2 += v[jrow] * x[4 * idx[jrow] + 1];
478: sum3 += v[jrow] * x[4 * idx[jrow] + 2];
479: sum4 += v[jrow] * x[4 * idx[jrow] + 3];
480: jrow++;
481: }
482: y[4 * i] = sum1;
483: y[4 * i + 1] = sum2;
484: y[4 * i + 2] = sum3;
485: y[4 * i + 3] = sum4;
486: }
488: PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow);
489: VecRestoreArrayRead(xx, &x);
490: VecRestoreArray(yy, &y);
491: return 0;
492: }
494: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy)
495: {
496: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
497: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
498: const PetscScalar *x, *v;
499: PetscScalar *y, alpha1, alpha2, alpha3, alpha4;
500: PetscInt n, i;
501: const PetscInt m = b->AIJ->rmap->n, *idx;
503: VecSet(yy, 0.0);
504: VecGetArrayRead(xx, &x);
505: VecGetArray(yy, &y);
506: for (i = 0; i < m; i++) {
507: idx = a->j + a->i[i];
508: v = a->a + a->i[i];
509: n = a->i[i + 1] - a->i[i];
510: alpha1 = x[4 * i];
511: alpha2 = x[4 * i + 1];
512: alpha3 = x[4 * i + 2];
513: alpha4 = x[4 * i + 3];
514: while (n-- > 0) {
515: y[4 * (*idx)] += alpha1 * (*v);
516: y[4 * (*idx) + 1] += alpha2 * (*v);
517: y[4 * (*idx) + 2] += alpha3 * (*v);
518: y[4 * (*idx) + 3] += alpha4 * (*v);
519: idx++;
520: v++;
521: }
522: }
523: PetscLogFlops(8.0 * a->nz);
524: VecRestoreArrayRead(xx, &x);
525: VecRestoreArray(yy, &y);
526: return 0;
527: }
529: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz)
530: {
531: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
532: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
533: const PetscScalar *x, *v;
534: PetscScalar *y, sum1, sum2, sum3, sum4;
535: PetscInt n, i, jrow, j;
536: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
538: if (yy != zz) VecCopy(yy, zz);
539: VecGetArrayRead(xx, &x);
540: VecGetArray(zz, &y);
541: idx = a->j;
542: v = a->a;
543: ii = a->i;
545: for (i = 0; i < m; i++) {
546: jrow = ii[i];
547: n = ii[i + 1] - jrow;
548: sum1 = 0.0;
549: sum2 = 0.0;
550: sum3 = 0.0;
551: sum4 = 0.0;
552: for (j = 0; j < n; j++) {
553: sum1 += v[jrow] * x[4 * idx[jrow]];
554: sum2 += v[jrow] * x[4 * idx[jrow] + 1];
555: sum3 += v[jrow] * x[4 * idx[jrow] + 2];
556: sum4 += v[jrow] * x[4 * idx[jrow] + 3];
557: jrow++;
558: }
559: y[4 * i] += sum1;
560: y[4 * i + 1] += sum2;
561: y[4 * i + 2] += sum3;
562: y[4 * i + 3] += sum4;
563: }
565: PetscLogFlops(8.0 * a->nz);
566: VecRestoreArrayRead(xx, &x);
567: VecRestoreArray(zz, &y);
568: return 0;
569: }
570: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz)
571: {
572: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
573: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
574: const PetscScalar *x, *v;
575: PetscScalar *y, alpha1, alpha2, alpha3, alpha4;
576: PetscInt n, i;
577: const PetscInt m = b->AIJ->rmap->n, *idx;
579: if (yy != zz) VecCopy(yy, zz);
580: VecGetArrayRead(xx, &x);
581: VecGetArray(zz, &y);
583: for (i = 0; i < m; i++) {
584: idx = a->j + a->i[i];
585: v = a->a + a->i[i];
586: n = a->i[i + 1] - a->i[i];
587: alpha1 = x[4 * i];
588: alpha2 = x[4 * i + 1];
589: alpha3 = x[4 * i + 2];
590: alpha4 = x[4 * i + 3];
591: while (n-- > 0) {
592: y[4 * (*idx)] += alpha1 * (*v);
593: y[4 * (*idx) + 1] += alpha2 * (*v);
594: y[4 * (*idx) + 2] += alpha3 * (*v);
595: y[4 * (*idx) + 3] += alpha4 * (*v);
596: idx++;
597: v++;
598: }
599: }
600: PetscLogFlops(8.0 * a->nz);
601: VecRestoreArrayRead(xx, &x);
602: VecRestoreArray(zz, &y);
603: return 0;
604: }
605: /* ------------------------------------------------------------------------------*/
607: PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy)
608: {
609: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
610: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
611: const PetscScalar *x, *v;
612: PetscScalar *y, sum1, sum2, sum3, sum4, sum5;
613: PetscInt nonzerorow = 0, n, i, jrow, j;
614: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
616: VecGetArrayRead(xx, &x);
617: VecGetArray(yy, &y);
618: idx = a->j;
619: v = a->a;
620: ii = a->i;
622: for (i = 0; i < m; i++) {
623: jrow = ii[i];
624: n = ii[i + 1] - jrow;
625: sum1 = 0.0;
626: sum2 = 0.0;
627: sum3 = 0.0;
628: sum4 = 0.0;
629: sum5 = 0.0;
631: nonzerorow += (n > 0);
632: for (j = 0; j < n; j++) {
633: sum1 += v[jrow] * x[5 * idx[jrow]];
634: sum2 += v[jrow] * x[5 * idx[jrow] + 1];
635: sum3 += v[jrow] * x[5 * idx[jrow] + 2];
636: sum4 += v[jrow] * x[5 * idx[jrow] + 3];
637: sum5 += v[jrow] * x[5 * idx[jrow] + 4];
638: jrow++;
639: }
640: y[5 * i] = sum1;
641: y[5 * i + 1] = sum2;
642: y[5 * i + 2] = sum3;
643: y[5 * i + 3] = sum4;
644: y[5 * i + 4] = sum5;
645: }
647: PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow);
648: VecRestoreArrayRead(xx, &x);
649: VecRestoreArray(yy, &y);
650: return 0;
651: }
653: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy)
654: {
655: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
656: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
657: const PetscScalar *x, *v;
658: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5;
659: PetscInt n, i;
660: const PetscInt m = b->AIJ->rmap->n, *idx;
662: VecSet(yy, 0.0);
663: VecGetArrayRead(xx, &x);
664: VecGetArray(yy, &y);
666: for (i = 0; i < m; i++) {
667: idx = a->j + a->i[i];
668: v = a->a + a->i[i];
669: n = a->i[i + 1] - a->i[i];
670: alpha1 = x[5 * i];
671: alpha2 = x[5 * i + 1];
672: alpha3 = x[5 * i + 2];
673: alpha4 = x[5 * i + 3];
674: alpha5 = x[5 * i + 4];
675: while (n-- > 0) {
676: y[5 * (*idx)] += alpha1 * (*v);
677: y[5 * (*idx) + 1] += alpha2 * (*v);
678: y[5 * (*idx) + 2] += alpha3 * (*v);
679: y[5 * (*idx) + 3] += alpha4 * (*v);
680: y[5 * (*idx) + 4] += alpha5 * (*v);
681: idx++;
682: v++;
683: }
684: }
685: PetscLogFlops(10.0 * a->nz);
686: VecRestoreArrayRead(xx, &x);
687: VecRestoreArray(yy, &y);
688: return 0;
689: }
691: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz)
692: {
693: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
694: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
695: const PetscScalar *x, *v;
696: PetscScalar *y, sum1, sum2, sum3, sum4, sum5;
697: PetscInt n, i, jrow, j;
698: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
700: if (yy != zz) VecCopy(yy, zz);
701: VecGetArrayRead(xx, &x);
702: VecGetArray(zz, &y);
703: idx = a->j;
704: v = a->a;
705: ii = a->i;
707: for (i = 0; i < m; i++) {
708: jrow = ii[i];
709: n = ii[i + 1] - jrow;
710: sum1 = 0.0;
711: sum2 = 0.0;
712: sum3 = 0.0;
713: sum4 = 0.0;
714: sum5 = 0.0;
715: for (j = 0; j < n; j++) {
716: sum1 += v[jrow] * x[5 * idx[jrow]];
717: sum2 += v[jrow] * x[5 * idx[jrow] + 1];
718: sum3 += v[jrow] * x[5 * idx[jrow] + 2];
719: sum4 += v[jrow] * x[5 * idx[jrow] + 3];
720: sum5 += v[jrow] * x[5 * idx[jrow] + 4];
721: jrow++;
722: }
723: y[5 * i] += sum1;
724: y[5 * i + 1] += sum2;
725: y[5 * i + 2] += sum3;
726: y[5 * i + 3] += sum4;
727: y[5 * i + 4] += sum5;
728: }
730: PetscLogFlops(10.0 * a->nz);
731: VecRestoreArrayRead(xx, &x);
732: VecRestoreArray(zz, &y);
733: return 0;
734: }
736: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz)
737: {
738: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
739: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
740: const PetscScalar *x, *v;
741: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5;
742: PetscInt n, i;
743: const PetscInt m = b->AIJ->rmap->n, *idx;
745: if (yy != zz) VecCopy(yy, zz);
746: VecGetArrayRead(xx, &x);
747: VecGetArray(zz, &y);
749: for (i = 0; i < m; i++) {
750: idx = a->j + a->i[i];
751: v = a->a + a->i[i];
752: n = a->i[i + 1] - a->i[i];
753: alpha1 = x[5 * i];
754: alpha2 = x[5 * i + 1];
755: alpha3 = x[5 * i + 2];
756: alpha4 = x[5 * i + 3];
757: alpha5 = x[5 * i + 4];
758: while (n-- > 0) {
759: y[5 * (*idx)] += alpha1 * (*v);
760: y[5 * (*idx) + 1] += alpha2 * (*v);
761: y[5 * (*idx) + 2] += alpha3 * (*v);
762: y[5 * (*idx) + 3] += alpha4 * (*v);
763: y[5 * (*idx) + 4] += alpha5 * (*v);
764: idx++;
765: v++;
766: }
767: }
768: PetscLogFlops(10.0 * a->nz);
769: VecRestoreArrayRead(xx, &x);
770: VecRestoreArray(zz, &y);
771: return 0;
772: }
774: /* ------------------------------------------------------------------------------*/
775: PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy)
776: {
777: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
778: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
779: const PetscScalar *x, *v;
780: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6;
781: PetscInt nonzerorow = 0, n, i, jrow, j;
782: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
784: VecGetArrayRead(xx, &x);
785: VecGetArray(yy, &y);
786: idx = a->j;
787: v = a->a;
788: ii = a->i;
790: for (i = 0; i < m; i++) {
791: jrow = ii[i];
792: n = ii[i + 1] - jrow;
793: sum1 = 0.0;
794: sum2 = 0.0;
795: sum3 = 0.0;
796: sum4 = 0.0;
797: sum5 = 0.0;
798: sum6 = 0.0;
800: nonzerorow += (n > 0);
801: for (j = 0; j < n; j++) {
802: sum1 += v[jrow] * x[6 * idx[jrow]];
803: sum2 += v[jrow] * x[6 * idx[jrow] + 1];
804: sum3 += v[jrow] * x[6 * idx[jrow] + 2];
805: sum4 += v[jrow] * x[6 * idx[jrow] + 3];
806: sum5 += v[jrow] * x[6 * idx[jrow] + 4];
807: sum6 += v[jrow] * x[6 * idx[jrow] + 5];
808: jrow++;
809: }
810: y[6 * i] = sum1;
811: y[6 * i + 1] = sum2;
812: y[6 * i + 2] = sum3;
813: y[6 * i + 3] = sum4;
814: y[6 * i + 4] = sum5;
815: y[6 * i + 5] = sum6;
816: }
818: PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow);
819: VecRestoreArrayRead(xx, &x);
820: VecRestoreArray(yy, &y);
821: return 0;
822: }
824: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy)
825: {
826: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
827: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
828: const PetscScalar *x, *v;
829: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
830: PetscInt n, i;
831: const PetscInt m = b->AIJ->rmap->n, *idx;
833: VecSet(yy, 0.0);
834: VecGetArrayRead(xx, &x);
835: VecGetArray(yy, &y);
837: for (i = 0; i < m; i++) {
838: idx = a->j + a->i[i];
839: v = a->a + a->i[i];
840: n = a->i[i + 1] - a->i[i];
841: alpha1 = x[6 * i];
842: alpha2 = x[6 * i + 1];
843: alpha3 = x[6 * i + 2];
844: alpha4 = x[6 * i + 3];
845: alpha5 = x[6 * i + 4];
846: alpha6 = x[6 * i + 5];
847: while (n-- > 0) {
848: y[6 * (*idx)] += alpha1 * (*v);
849: y[6 * (*idx) + 1] += alpha2 * (*v);
850: y[6 * (*idx) + 2] += alpha3 * (*v);
851: y[6 * (*idx) + 3] += alpha4 * (*v);
852: y[6 * (*idx) + 4] += alpha5 * (*v);
853: y[6 * (*idx) + 5] += alpha6 * (*v);
854: idx++;
855: v++;
856: }
857: }
858: PetscLogFlops(12.0 * a->nz);
859: VecRestoreArrayRead(xx, &x);
860: VecRestoreArray(yy, &y);
861: return 0;
862: }
864: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz)
865: {
866: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
867: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
868: const PetscScalar *x, *v;
869: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6;
870: PetscInt n, i, jrow, j;
871: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
873: if (yy != zz) VecCopy(yy, zz);
874: VecGetArrayRead(xx, &x);
875: VecGetArray(zz, &y);
876: idx = a->j;
877: v = a->a;
878: ii = a->i;
880: for (i = 0; i < m; i++) {
881: jrow = ii[i];
882: n = ii[i + 1] - jrow;
883: sum1 = 0.0;
884: sum2 = 0.0;
885: sum3 = 0.0;
886: sum4 = 0.0;
887: sum5 = 0.0;
888: sum6 = 0.0;
889: for (j = 0; j < n; j++) {
890: sum1 += v[jrow] * x[6 * idx[jrow]];
891: sum2 += v[jrow] * x[6 * idx[jrow] + 1];
892: sum3 += v[jrow] * x[6 * idx[jrow] + 2];
893: sum4 += v[jrow] * x[6 * idx[jrow] + 3];
894: sum5 += v[jrow] * x[6 * idx[jrow] + 4];
895: sum6 += v[jrow] * x[6 * idx[jrow] + 5];
896: jrow++;
897: }
898: y[6 * i] += sum1;
899: y[6 * i + 1] += sum2;
900: y[6 * i + 2] += sum3;
901: y[6 * i + 3] += sum4;
902: y[6 * i + 4] += sum5;
903: y[6 * i + 5] += sum6;
904: }
906: PetscLogFlops(12.0 * a->nz);
907: VecRestoreArrayRead(xx, &x);
908: VecRestoreArray(zz, &y);
909: return 0;
910: }
912: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz)
913: {
914: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
915: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
916: const PetscScalar *x, *v;
917: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6;
918: PetscInt n, i;
919: const PetscInt m = b->AIJ->rmap->n, *idx;
921: if (yy != zz) VecCopy(yy, zz);
922: VecGetArrayRead(xx, &x);
923: VecGetArray(zz, &y);
925: for (i = 0; i < m; i++) {
926: idx = a->j + a->i[i];
927: v = a->a + a->i[i];
928: n = a->i[i + 1] - a->i[i];
929: alpha1 = x[6 * i];
930: alpha2 = x[6 * i + 1];
931: alpha3 = x[6 * i + 2];
932: alpha4 = x[6 * i + 3];
933: alpha5 = x[6 * i + 4];
934: alpha6 = x[6 * i + 5];
935: while (n-- > 0) {
936: y[6 * (*idx)] += alpha1 * (*v);
937: y[6 * (*idx) + 1] += alpha2 * (*v);
938: y[6 * (*idx) + 2] += alpha3 * (*v);
939: y[6 * (*idx) + 3] += alpha4 * (*v);
940: y[6 * (*idx) + 4] += alpha5 * (*v);
941: y[6 * (*idx) + 5] += alpha6 * (*v);
942: idx++;
943: v++;
944: }
945: }
946: PetscLogFlops(12.0 * a->nz);
947: VecRestoreArrayRead(xx, &x);
948: VecRestoreArray(zz, &y);
949: return 0;
950: }
952: /* ------------------------------------------------------------------------------*/
953: PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy)
954: {
955: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
956: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
957: const PetscScalar *x, *v;
958: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
959: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
960: PetscInt nonzerorow = 0, n, i, jrow, j;
962: VecGetArrayRead(xx, &x);
963: VecGetArray(yy, &y);
964: idx = a->j;
965: v = a->a;
966: ii = a->i;
968: for (i = 0; i < m; i++) {
969: jrow = ii[i];
970: n = ii[i + 1] - jrow;
971: sum1 = 0.0;
972: sum2 = 0.0;
973: sum3 = 0.0;
974: sum4 = 0.0;
975: sum5 = 0.0;
976: sum6 = 0.0;
977: sum7 = 0.0;
979: nonzerorow += (n > 0);
980: for (j = 0; j < n; j++) {
981: sum1 += v[jrow] * x[7 * idx[jrow]];
982: sum2 += v[jrow] * x[7 * idx[jrow] + 1];
983: sum3 += v[jrow] * x[7 * idx[jrow] + 2];
984: sum4 += v[jrow] * x[7 * idx[jrow] + 3];
985: sum5 += v[jrow] * x[7 * idx[jrow] + 4];
986: sum6 += v[jrow] * x[7 * idx[jrow] + 5];
987: sum7 += v[jrow] * x[7 * idx[jrow] + 6];
988: jrow++;
989: }
990: y[7 * i] = sum1;
991: y[7 * i + 1] = sum2;
992: y[7 * i + 2] = sum3;
993: y[7 * i + 3] = sum4;
994: y[7 * i + 4] = sum5;
995: y[7 * i + 5] = sum6;
996: y[7 * i + 6] = sum7;
997: }
999: PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow);
1000: VecRestoreArrayRead(xx, &x);
1001: VecRestoreArray(yy, &y);
1002: return 0;
1003: }
1005: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy)
1006: {
1007: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1008: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1009: const PetscScalar *x, *v;
1010: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1011: const PetscInt m = b->AIJ->rmap->n, *idx;
1012: PetscInt n, i;
1014: VecSet(yy, 0.0);
1015: VecGetArrayRead(xx, &x);
1016: VecGetArray(yy, &y);
1018: for (i = 0; i < m; i++) {
1019: idx = a->j + a->i[i];
1020: v = a->a + a->i[i];
1021: n = a->i[i + 1] - a->i[i];
1022: alpha1 = x[7 * i];
1023: alpha2 = x[7 * i + 1];
1024: alpha3 = x[7 * i + 2];
1025: alpha4 = x[7 * i + 3];
1026: alpha5 = x[7 * i + 4];
1027: alpha6 = x[7 * i + 5];
1028: alpha7 = x[7 * i + 6];
1029: while (n-- > 0) {
1030: y[7 * (*idx)] += alpha1 * (*v);
1031: y[7 * (*idx) + 1] += alpha2 * (*v);
1032: y[7 * (*idx) + 2] += alpha3 * (*v);
1033: y[7 * (*idx) + 3] += alpha4 * (*v);
1034: y[7 * (*idx) + 4] += alpha5 * (*v);
1035: y[7 * (*idx) + 5] += alpha6 * (*v);
1036: y[7 * (*idx) + 6] += alpha7 * (*v);
1037: idx++;
1038: v++;
1039: }
1040: }
1041: PetscLogFlops(14.0 * a->nz);
1042: VecRestoreArrayRead(xx, &x);
1043: VecRestoreArray(yy, &y);
1044: return 0;
1045: }
1047: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz)
1048: {
1049: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1050: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1051: const PetscScalar *x, *v;
1052: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1053: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
1054: PetscInt n, i, jrow, j;
1056: if (yy != zz) VecCopy(yy, zz);
1057: VecGetArrayRead(xx, &x);
1058: VecGetArray(zz, &y);
1059: idx = a->j;
1060: v = a->a;
1061: ii = a->i;
1063: for (i = 0; i < m; i++) {
1064: jrow = ii[i];
1065: n = ii[i + 1] - jrow;
1066: sum1 = 0.0;
1067: sum2 = 0.0;
1068: sum3 = 0.0;
1069: sum4 = 0.0;
1070: sum5 = 0.0;
1071: sum6 = 0.0;
1072: sum7 = 0.0;
1073: for (j = 0; j < n; j++) {
1074: sum1 += v[jrow] * x[7 * idx[jrow]];
1075: sum2 += v[jrow] * x[7 * idx[jrow] + 1];
1076: sum3 += v[jrow] * x[7 * idx[jrow] + 2];
1077: sum4 += v[jrow] * x[7 * idx[jrow] + 3];
1078: sum5 += v[jrow] * x[7 * idx[jrow] + 4];
1079: sum6 += v[jrow] * x[7 * idx[jrow] + 5];
1080: sum7 += v[jrow] * x[7 * idx[jrow] + 6];
1081: jrow++;
1082: }
1083: y[7 * i] += sum1;
1084: y[7 * i + 1] += sum2;
1085: y[7 * i + 2] += sum3;
1086: y[7 * i + 3] += sum4;
1087: y[7 * i + 4] += sum5;
1088: y[7 * i + 5] += sum6;
1089: y[7 * i + 6] += sum7;
1090: }
1092: PetscLogFlops(14.0 * a->nz);
1093: VecRestoreArrayRead(xx, &x);
1094: VecRestoreArray(zz, &y);
1095: return 0;
1096: }
1098: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz)
1099: {
1100: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1101: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1102: const PetscScalar *x, *v;
1103: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7;
1104: const PetscInt m = b->AIJ->rmap->n, *idx;
1105: PetscInt n, i;
1107: if (yy != zz) VecCopy(yy, zz);
1108: VecGetArrayRead(xx, &x);
1109: VecGetArray(zz, &y);
1110: for (i = 0; i < m; i++) {
1111: idx = a->j + a->i[i];
1112: v = a->a + a->i[i];
1113: n = a->i[i + 1] - a->i[i];
1114: alpha1 = x[7 * i];
1115: alpha2 = x[7 * i + 1];
1116: alpha3 = x[7 * i + 2];
1117: alpha4 = x[7 * i + 3];
1118: alpha5 = x[7 * i + 4];
1119: alpha6 = x[7 * i + 5];
1120: alpha7 = x[7 * i + 6];
1121: while (n-- > 0) {
1122: y[7 * (*idx)] += alpha1 * (*v);
1123: y[7 * (*idx) + 1] += alpha2 * (*v);
1124: y[7 * (*idx) + 2] += alpha3 * (*v);
1125: y[7 * (*idx) + 3] += alpha4 * (*v);
1126: y[7 * (*idx) + 4] += alpha5 * (*v);
1127: y[7 * (*idx) + 5] += alpha6 * (*v);
1128: y[7 * (*idx) + 6] += alpha7 * (*v);
1129: idx++;
1130: v++;
1131: }
1132: }
1133: PetscLogFlops(14.0 * a->nz);
1134: VecRestoreArrayRead(xx, &x);
1135: VecRestoreArray(zz, &y);
1136: return 0;
1137: }
1139: PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy)
1140: {
1141: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1142: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1143: const PetscScalar *x, *v;
1144: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1145: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
1146: PetscInt nonzerorow = 0, n, i, jrow, j;
1148: VecGetArrayRead(xx, &x);
1149: VecGetArray(yy, &y);
1150: idx = a->j;
1151: v = a->a;
1152: ii = a->i;
1154: for (i = 0; i < m; i++) {
1155: jrow = ii[i];
1156: n = ii[i + 1] - jrow;
1157: sum1 = 0.0;
1158: sum2 = 0.0;
1159: sum3 = 0.0;
1160: sum4 = 0.0;
1161: sum5 = 0.0;
1162: sum6 = 0.0;
1163: sum7 = 0.0;
1164: sum8 = 0.0;
1166: nonzerorow += (n > 0);
1167: for (j = 0; j < n; j++) {
1168: sum1 += v[jrow] * x[8 * idx[jrow]];
1169: sum2 += v[jrow] * x[8 * idx[jrow] + 1];
1170: sum3 += v[jrow] * x[8 * idx[jrow] + 2];
1171: sum4 += v[jrow] * x[8 * idx[jrow] + 3];
1172: sum5 += v[jrow] * x[8 * idx[jrow] + 4];
1173: sum6 += v[jrow] * x[8 * idx[jrow] + 5];
1174: sum7 += v[jrow] * x[8 * idx[jrow] + 6];
1175: sum8 += v[jrow] * x[8 * idx[jrow] + 7];
1176: jrow++;
1177: }
1178: y[8 * i] = sum1;
1179: y[8 * i + 1] = sum2;
1180: y[8 * i + 2] = sum3;
1181: y[8 * i + 3] = sum4;
1182: y[8 * i + 4] = sum5;
1183: y[8 * i + 5] = sum6;
1184: y[8 * i + 6] = sum7;
1185: y[8 * i + 7] = sum8;
1186: }
1188: PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow);
1189: VecRestoreArrayRead(xx, &x);
1190: VecRestoreArray(yy, &y);
1191: return 0;
1192: }
1194: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy)
1195: {
1196: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1197: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1198: const PetscScalar *x, *v;
1199: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1200: const PetscInt m = b->AIJ->rmap->n, *idx;
1201: PetscInt n, i;
1203: VecSet(yy, 0.0);
1204: VecGetArrayRead(xx, &x);
1205: VecGetArray(yy, &y);
1207: for (i = 0; i < m; i++) {
1208: idx = a->j + a->i[i];
1209: v = a->a + a->i[i];
1210: n = a->i[i + 1] - a->i[i];
1211: alpha1 = x[8 * i];
1212: alpha2 = x[8 * i + 1];
1213: alpha3 = x[8 * i + 2];
1214: alpha4 = x[8 * i + 3];
1215: alpha5 = x[8 * i + 4];
1216: alpha6 = x[8 * i + 5];
1217: alpha7 = x[8 * i + 6];
1218: alpha8 = x[8 * i + 7];
1219: while (n-- > 0) {
1220: y[8 * (*idx)] += alpha1 * (*v);
1221: y[8 * (*idx) + 1] += alpha2 * (*v);
1222: y[8 * (*idx) + 2] += alpha3 * (*v);
1223: y[8 * (*idx) + 3] += alpha4 * (*v);
1224: y[8 * (*idx) + 4] += alpha5 * (*v);
1225: y[8 * (*idx) + 5] += alpha6 * (*v);
1226: y[8 * (*idx) + 6] += alpha7 * (*v);
1227: y[8 * (*idx) + 7] += alpha8 * (*v);
1228: idx++;
1229: v++;
1230: }
1231: }
1232: PetscLogFlops(16.0 * a->nz);
1233: VecRestoreArrayRead(xx, &x);
1234: VecRestoreArray(yy, &y);
1235: return 0;
1236: }
1238: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz)
1239: {
1240: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1241: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1242: const PetscScalar *x, *v;
1243: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1244: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
1245: PetscInt n, i, jrow, j;
1247: if (yy != zz) VecCopy(yy, zz);
1248: VecGetArrayRead(xx, &x);
1249: VecGetArray(zz, &y);
1250: idx = a->j;
1251: v = a->a;
1252: ii = a->i;
1254: for (i = 0; i < m; i++) {
1255: jrow = ii[i];
1256: n = ii[i + 1] - jrow;
1257: sum1 = 0.0;
1258: sum2 = 0.0;
1259: sum3 = 0.0;
1260: sum4 = 0.0;
1261: sum5 = 0.0;
1262: sum6 = 0.0;
1263: sum7 = 0.0;
1264: sum8 = 0.0;
1265: for (j = 0; j < n; j++) {
1266: sum1 += v[jrow] * x[8 * idx[jrow]];
1267: sum2 += v[jrow] * x[8 * idx[jrow] + 1];
1268: sum3 += v[jrow] * x[8 * idx[jrow] + 2];
1269: sum4 += v[jrow] * x[8 * idx[jrow] + 3];
1270: sum5 += v[jrow] * x[8 * idx[jrow] + 4];
1271: sum6 += v[jrow] * x[8 * idx[jrow] + 5];
1272: sum7 += v[jrow] * x[8 * idx[jrow] + 6];
1273: sum8 += v[jrow] * x[8 * idx[jrow] + 7];
1274: jrow++;
1275: }
1276: y[8 * i] += sum1;
1277: y[8 * i + 1] += sum2;
1278: y[8 * i + 2] += sum3;
1279: y[8 * i + 3] += sum4;
1280: y[8 * i + 4] += sum5;
1281: y[8 * i + 5] += sum6;
1282: y[8 * i + 6] += sum7;
1283: y[8 * i + 7] += sum8;
1284: }
1286: PetscLogFlops(16.0 * a->nz);
1287: VecRestoreArrayRead(xx, &x);
1288: VecRestoreArray(zz, &y);
1289: return 0;
1290: }
1292: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz)
1293: {
1294: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1295: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1296: const PetscScalar *x, *v;
1297: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
1298: const PetscInt m = b->AIJ->rmap->n, *idx;
1299: PetscInt n, i;
1301: if (yy != zz) VecCopy(yy, zz);
1302: VecGetArrayRead(xx, &x);
1303: VecGetArray(zz, &y);
1304: for (i = 0; i < m; i++) {
1305: idx = a->j + a->i[i];
1306: v = a->a + a->i[i];
1307: n = a->i[i + 1] - a->i[i];
1308: alpha1 = x[8 * i];
1309: alpha2 = x[8 * i + 1];
1310: alpha3 = x[8 * i + 2];
1311: alpha4 = x[8 * i + 3];
1312: alpha5 = x[8 * i + 4];
1313: alpha6 = x[8 * i + 5];
1314: alpha7 = x[8 * i + 6];
1315: alpha8 = x[8 * i + 7];
1316: while (n-- > 0) {
1317: y[8 * (*idx)] += alpha1 * (*v);
1318: y[8 * (*idx) + 1] += alpha2 * (*v);
1319: y[8 * (*idx) + 2] += alpha3 * (*v);
1320: y[8 * (*idx) + 3] += alpha4 * (*v);
1321: y[8 * (*idx) + 4] += alpha5 * (*v);
1322: y[8 * (*idx) + 5] += alpha6 * (*v);
1323: y[8 * (*idx) + 6] += alpha7 * (*v);
1324: y[8 * (*idx) + 7] += alpha8 * (*v);
1325: idx++;
1326: v++;
1327: }
1328: }
1329: PetscLogFlops(16.0 * a->nz);
1330: VecRestoreArrayRead(xx, &x);
1331: VecRestoreArray(zz, &y);
1332: return 0;
1333: }
1335: /* ------------------------------------------------------------------------------*/
1336: PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy)
1337: {
1338: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1339: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1340: const PetscScalar *x, *v;
1341: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1342: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
1343: PetscInt nonzerorow = 0, n, i, jrow, j;
1345: VecGetArrayRead(xx, &x);
1346: VecGetArray(yy, &y);
1347: idx = a->j;
1348: v = a->a;
1349: ii = a->i;
1351: for (i = 0; i < m; i++) {
1352: jrow = ii[i];
1353: n = ii[i + 1] - jrow;
1354: sum1 = 0.0;
1355: sum2 = 0.0;
1356: sum3 = 0.0;
1357: sum4 = 0.0;
1358: sum5 = 0.0;
1359: sum6 = 0.0;
1360: sum7 = 0.0;
1361: sum8 = 0.0;
1362: sum9 = 0.0;
1364: nonzerorow += (n > 0);
1365: for (j = 0; j < n; j++) {
1366: sum1 += v[jrow] * x[9 * idx[jrow]];
1367: sum2 += v[jrow] * x[9 * idx[jrow] + 1];
1368: sum3 += v[jrow] * x[9 * idx[jrow] + 2];
1369: sum4 += v[jrow] * x[9 * idx[jrow] + 3];
1370: sum5 += v[jrow] * x[9 * idx[jrow] + 4];
1371: sum6 += v[jrow] * x[9 * idx[jrow] + 5];
1372: sum7 += v[jrow] * x[9 * idx[jrow] + 6];
1373: sum8 += v[jrow] * x[9 * idx[jrow] + 7];
1374: sum9 += v[jrow] * x[9 * idx[jrow] + 8];
1375: jrow++;
1376: }
1377: y[9 * i] = sum1;
1378: y[9 * i + 1] = sum2;
1379: y[9 * i + 2] = sum3;
1380: y[9 * i + 3] = sum4;
1381: y[9 * i + 4] = sum5;
1382: y[9 * i + 5] = sum6;
1383: y[9 * i + 6] = sum7;
1384: y[9 * i + 7] = sum8;
1385: y[9 * i + 8] = sum9;
1386: }
1388: PetscLogFlops(18.0 * a->nz - 9 * nonzerorow);
1389: VecRestoreArrayRead(xx, &x);
1390: VecRestoreArray(yy, &y);
1391: return 0;
1392: }
1394: /* ------------------------------------------------------------------------------*/
1396: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy)
1397: {
1398: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1399: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1400: const PetscScalar *x, *v;
1401: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1402: const PetscInt m = b->AIJ->rmap->n, *idx;
1403: PetscInt n, i;
1405: VecSet(yy, 0.0);
1406: VecGetArrayRead(xx, &x);
1407: VecGetArray(yy, &y);
1409: for (i = 0; i < m; i++) {
1410: idx = a->j + a->i[i];
1411: v = a->a + a->i[i];
1412: n = a->i[i + 1] - a->i[i];
1413: alpha1 = x[9 * i];
1414: alpha2 = x[9 * i + 1];
1415: alpha3 = x[9 * i + 2];
1416: alpha4 = x[9 * i + 3];
1417: alpha5 = x[9 * i + 4];
1418: alpha6 = x[9 * i + 5];
1419: alpha7 = x[9 * i + 6];
1420: alpha8 = x[9 * i + 7];
1421: alpha9 = x[9 * i + 8];
1422: while (n-- > 0) {
1423: y[9 * (*idx)] += alpha1 * (*v);
1424: y[9 * (*idx) + 1] += alpha2 * (*v);
1425: y[9 * (*idx) + 2] += alpha3 * (*v);
1426: y[9 * (*idx) + 3] += alpha4 * (*v);
1427: y[9 * (*idx) + 4] += alpha5 * (*v);
1428: y[9 * (*idx) + 5] += alpha6 * (*v);
1429: y[9 * (*idx) + 6] += alpha7 * (*v);
1430: y[9 * (*idx) + 7] += alpha8 * (*v);
1431: y[9 * (*idx) + 8] += alpha9 * (*v);
1432: idx++;
1433: v++;
1434: }
1435: }
1436: PetscLogFlops(18.0 * a->nz);
1437: VecRestoreArrayRead(xx, &x);
1438: VecRestoreArray(yy, &y);
1439: return 0;
1440: }
1442: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz)
1443: {
1444: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1445: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1446: const PetscScalar *x, *v;
1447: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1448: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
1449: PetscInt n, i, jrow, j;
1451: if (yy != zz) VecCopy(yy, zz);
1452: VecGetArrayRead(xx, &x);
1453: VecGetArray(zz, &y);
1454: idx = a->j;
1455: v = a->a;
1456: ii = a->i;
1458: for (i = 0; i < m; i++) {
1459: jrow = ii[i];
1460: n = ii[i + 1] - jrow;
1461: sum1 = 0.0;
1462: sum2 = 0.0;
1463: sum3 = 0.0;
1464: sum4 = 0.0;
1465: sum5 = 0.0;
1466: sum6 = 0.0;
1467: sum7 = 0.0;
1468: sum8 = 0.0;
1469: sum9 = 0.0;
1470: for (j = 0; j < n; j++) {
1471: sum1 += v[jrow] * x[9 * idx[jrow]];
1472: sum2 += v[jrow] * x[9 * idx[jrow] + 1];
1473: sum3 += v[jrow] * x[9 * idx[jrow] + 2];
1474: sum4 += v[jrow] * x[9 * idx[jrow] + 3];
1475: sum5 += v[jrow] * x[9 * idx[jrow] + 4];
1476: sum6 += v[jrow] * x[9 * idx[jrow] + 5];
1477: sum7 += v[jrow] * x[9 * idx[jrow] + 6];
1478: sum8 += v[jrow] * x[9 * idx[jrow] + 7];
1479: sum9 += v[jrow] * x[9 * idx[jrow] + 8];
1480: jrow++;
1481: }
1482: y[9 * i] += sum1;
1483: y[9 * i + 1] += sum2;
1484: y[9 * i + 2] += sum3;
1485: y[9 * i + 3] += sum4;
1486: y[9 * i + 4] += sum5;
1487: y[9 * i + 5] += sum6;
1488: y[9 * i + 6] += sum7;
1489: y[9 * i + 7] += sum8;
1490: y[9 * i + 8] += sum9;
1491: }
1493: PetscLogFlops(18.0 * a->nz);
1494: VecRestoreArrayRead(xx, &x);
1495: VecRestoreArray(zz, &y);
1496: return 0;
1497: }
1499: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz)
1500: {
1501: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1502: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1503: const PetscScalar *x, *v;
1504: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9;
1505: const PetscInt m = b->AIJ->rmap->n, *idx;
1506: PetscInt n, i;
1508: if (yy != zz) VecCopy(yy, zz);
1509: VecGetArrayRead(xx, &x);
1510: VecGetArray(zz, &y);
1511: for (i = 0; i < m; i++) {
1512: idx = a->j + a->i[i];
1513: v = a->a + a->i[i];
1514: n = a->i[i + 1] - a->i[i];
1515: alpha1 = x[9 * i];
1516: alpha2 = x[9 * i + 1];
1517: alpha3 = x[9 * i + 2];
1518: alpha4 = x[9 * i + 3];
1519: alpha5 = x[9 * i + 4];
1520: alpha6 = x[9 * i + 5];
1521: alpha7 = x[9 * i + 6];
1522: alpha8 = x[9 * i + 7];
1523: alpha9 = x[9 * i + 8];
1524: while (n-- > 0) {
1525: y[9 * (*idx)] += alpha1 * (*v);
1526: y[9 * (*idx) + 1] += alpha2 * (*v);
1527: y[9 * (*idx) + 2] += alpha3 * (*v);
1528: y[9 * (*idx) + 3] += alpha4 * (*v);
1529: y[9 * (*idx) + 4] += alpha5 * (*v);
1530: y[9 * (*idx) + 5] += alpha6 * (*v);
1531: y[9 * (*idx) + 6] += alpha7 * (*v);
1532: y[9 * (*idx) + 7] += alpha8 * (*v);
1533: y[9 * (*idx) + 8] += alpha9 * (*v);
1534: idx++;
1535: v++;
1536: }
1537: }
1538: PetscLogFlops(18.0 * a->nz);
1539: VecRestoreArrayRead(xx, &x);
1540: VecRestoreArray(zz, &y);
1541: return 0;
1542: }
1543: PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy)
1544: {
1545: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1546: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1547: const PetscScalar *x, *v;
1548: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1549: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
1550: PetscInt nonzerorow = 0, n, i, jrow, j;
1552: VecGetArrayRead(xx, &x);
1553: VecGetArray(yy, &y);
1554: idx = a->j;
1555: v = a->a;
1556: ii = a->i;
1558: for (i = 0; i < m; i++) {
1559: jrow = ii[i];
1560: n = ii[i + 1] - jrow;
1561: sum1 = 0.0;
1562: sum2 = 0.0;
1563: sum3 = 0.0;
1564: sum4 = 0.0;
1565: sum5 = 0.0;
1566: sum6 = 0.0;
1567: sum7 = 0.0;
1568: sum8 = 0.0;
1569: sum9 = 0.0;
1570: sum10 = 0.0;
1572: nonzerorow += (n > 0);
1573: for (j = 0; j < n; j++) {
1574: sum1 += v[jrow] * x[10 * idx[jrow]];
1575: sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1576: sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1577: sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1578: sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1579: sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1580: sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1581: sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1582: sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1583: sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1584: jrow++;
1585: }
1586: y[10 * i] = sum1;
1587: y[10 * i + 1] = sum2;
1588: y[10 * i + 2] = sum3;
1589: y[10 * i + 3] = sum4;
1590: y[10 * i + 4] = sum5;
1591: y[10 * i + 5] = sum6;
1592: y[10 * i + 6] = sum7;
1593: y[10 * i + 7] = sum8;
1594: y[10 * i + 8] = sum9;
1595: y[10 * i + 9] = sum10;
1596: }
1598: PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow);
1599: VecRestoreArrayRead(xx, &x);
1600: VecRestoreArray(yy, &y);
1601: return 0;
1602: }
1604: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz)
1605: {
1606: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1607: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1608: const PetscScalar *x, *v;
1609: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1610: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
1611: PetscInt n, i, jrow, j;
1613: if (yy != zz) VecCopy(yy, zz);
1614: VecGetArrayRead(xx, &x);
1615: VecGetArray(zz, &y);
1616: idx = a->j;
1617: v = a->a;
1618: ii = a->i;
1620: for (i = 0; i < m; i++) {
1621: jrow = ii[i];
1622: n = ii[i + 1] - jrow;
1623: sum1 = 0.0;
1624: sum2 = 0.0;
1625: sum3 = 0.0;
1626: sum4 = 0.0;
1627: sum5 = 0.0;
1628: sum6 = 0.0;
1629: sum7 = 0.0;
1630: sum8 = 0.0;
1631: sum9 = 0.0;
1632: sum10 = 0.0;
1633: for (j = 0; j < n; j++) {
1634: sum1 += v[jrow] * x[10 * idx[jrow]];
1635: sum2 += v[jrow] * x[10 * idx[jrow] + 1];
1636: sum3 += v[jrow] * x[10 * idx[jrow] + 2];
1637: sum4 += v[jrow] * x[10 * idx[jrow] + 3];
1638: sum5 += v[jrow] * x[10 * idx[jrow] + 4];
1639: sum6 += v[jrow] * x[10 * idx[jrow] + 5];
1640: sum7 += v[jrow] * x[10 * idx[jrow] + 6];
1641: sum8 += v[jrow] * x[10 * idx[jrow] + 7];
1642: sum9 += v[jrow] * x[10 * idx[jrow] + 8];
1643: sum10 += v[jrow] * x[10 * idx[jrow] + 9];
1644: jrow++;
1645: }
1646: y[10 * i] += sum1;
1647: y[10 * i + 1] += sum2;
1648: y[10 * i + 2] += sum3;
1649: y[10 * i + 3] += sum4;
1650: y[10 * i + 4] += sum5;
1651: y[10 * i + 5] += sum6;
1652: y[10 * i + 6] += sum7;
1653: y[10 * i + 7] += sum8;
1654: y[10 * i + 8] += sum9;
1655: y[10 * i + 9] += sum10;
1656: }
1658: PetscLogFlops(20.0 * a->nz);
1659: VecRestoreArrayRead(xx, &x);
1660: VecRestoreArray(yy, &y);
1661: return 0;
1662: }
1664: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy)
1665: {
1666: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1667: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1668: const PetscScalar *x, *v;
1669: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1670: const PetscInt m = b->AIJ->rmap->n, *idx;
1671: PetscInt n, i;
1673: VecSet(yy, 0.0);
1674: VecGetArrayRead(xx, &x);
1675: VecGetArray(yy, &y);
1677: for (i = 0; i < m; i++) {
1678: idx = a->j + a->i[i];
1679: v = a->a + a->i[i];
1680: n = a->i[i + 1] - a->i[i];
1681: alpha1 = x[10 * i];
1682: alpha2 = x[10 * i + 1];
1683: alpha3 = x[10 * i + 2];
1684: alpha4 = x[10 * i + 3];
1685: alpha5 = x[10 * i + 4];
1686: alpha6 = x[10 * i + 5];
1687: alpha7 = x[10 * i + 6];
1688: alpha8 = x[10 * i + 7];
1689: alpha9 = x[10 * i + 8];
1690: alpha10 = x[10 * i + 9];
1691: while (n-- > 0) {
1692: y[10 * (*idx)] += alpha1 * (*v);
1693: y[10 * (*idx) + 1] += alpha2 * (*v);
1694: y[10 * (*idx) + 2] += alpha3 * (*v);
1695: y[10 * (*idx) + 3] += alpha4 * (*v);
1696: y[10 * (*idx) + 4] += alpha5 * (*v);
1697: y[10 * (*idx) + 5] += alpha6 * (*v);
1698: y[10 * (*idx) + 6] += alpha7 * (*v);
1699: y[10 * (*idx) + 7] += alpha8 * (*v);
1700: y[10 * (*idx) + 8] += alpha9 * (*v);
1701: y[10 * (*idx) + 9] += alpha10 * (*v);
1702: idx++;
1703: v++;
1704: }
1705: }
1706: PetscLogFlops(20.0 * a->nz);
1707: VecRestoreArrayRead(xx, &x);
1708: VecRestoreArray(yy, &y);
1709: return 0;
1710: }
1712: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz)
1713: {
1714: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1715: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1716: const PetscScalar *x, *v;
1717: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10;
1718: const PetscInt m = b->AIJ->rmap->n, *idx;
1719: PetscInt n, i;
1721: if (yy != zz) VecCopy(yy, zz);
1722: VecGetArrayRead(xx, &x);
1723: VecGetArray(zz, &y);
1724: for (i = 0; i < m; i++) {
1725: idx = a->j + a->i[i];
1726: v = a->a + a->i[i];
1727: n = a->i[i + 1] - a->i[i];
1728: alpha1 = x[10 * i];
1729: alpha2 = x[10 * i + 1];
1730: alpha3 = x[10 * i + 2];
1731: alpha4 = x[10 * i + 3];
1732: alpha5 = x[10 * i + 4];
1733: alpha6 = x[10 * i + 5];
1734: alpha7 = x[10 * i + 6];
1735: alpha8 = x[10 * i + 7];
1736: alpha9 = x[10 * i + 8];
1737: alpha10 = x[10 * i + 9];
1738: while (n-- > 0) {
1739: y[10 * (*idx)] += alpha1 * (*v);
1740: y[10 * (*idx) + 1] += alpha2 * (*v);
1741: y[10 * (*idx) + 2] += alpha3 * (*v);
1742: y[10 * (*idx) + 3] += alpha4 * (*v);
1743: y[10 * (*idx) + 4] += alpha5 * (*v);
1744: y[10 * (*idx) + 5] += alpha6 * (*v);
1745: y[10 * (*idx) + 6] += alpha7 * (*v);
1746: y[10 * (*idx) + 7] += alpha8 * (*v);
1747: y[10 * (*idx) + 8] += alpha9 * (*v);
1748: y[10 * (*idx) + 9] += alpha10 * (*v);
1749: idx++;
1750: v++;
1751: }
1752: }
1753: PetscLogFlops(20.0 * a->nz);
1754: VecRestoreArrayRead(xx, &x);
1755: VecRestoreArray(zz, &y);
1756: return 0;
1757: }
1759: /*--------------------------------------------------------------------------------------------*/
1760: PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy)
1761: {
1762: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1763: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1764: const PetscScalar *x, *v;
1765: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1766: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
1767: PetscInt nonzerorow = 0, n, i, jrow, j;
1769: VecGetArrayRead(xx, &x);
1770: VecGetArray(yy, &y);
1771: idx = a->j;
1772: v = a->a;
1773: ii = a->i;
1775: for (i = 0; i < m; i++) {
1776: jrow = ii[i];
1777: n = ii[i + 1] - jrow;
1778: sum1 = 0.0;
1779: sum2 = 0.0;
1780: sum3 = 0.0;
1781: sum4 = 0.0;
1782: sum5 = 0.0;
1783: sum6 = 0.0;
1784: sum7 = 0.0;
1785: sum8 = 0.0;
1786: sum9 = 0.0;
1787: sum10 = 0.0;
1788: sum11 = 0.0;
1790: nonzerorow += (n > 0);
1791: for (j = 0; j < n; j++) {
1792: sum1 += v[jrow] * x[11 * idx[jrow]];
1793: sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1794: sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1795: sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1796: sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1797: sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1798: sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1799: sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1800: sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1801: sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1802: sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1803: jrow++;
1804: }
1805: y[11 * i] = sum1;
1806: y[11 * i + 1] = sum2;
1807: y[11 * i + 2] = sum3;
1808: y[11 * i + 3] = sum4;
1809: y[11 * i + 4] = sum5;
1810: y[11 * i + 5] = sum6;
1811: y[11 * i + 6] = sum7;
1812: y[11 * i + 7] = sum8;
1813: y[11 * i + 8] = sum9;
1814: y[11 * i + 9] = sum10;
1815: y[11 * i + 10] = sum11;
1816: }
1818: PetscLogFlops(22.0 * a->nz - 11 * nonzerorow);
1819: VecRestoreArrayRead(xx, &x);
1820: VecRestoreArray(yy, &y);
1821: return 0;
1822: }
1824: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz)
1825: {
1826: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1827: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1828: const PetscScalar *x, *v;
1829: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1830: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
1831: PetscInt n, i, jrow, j;
1833: if (yy != zz) VecCopy(yy, zz);
1834: VecGetArrayRead(xx, &x);
1835: VecGetArray(zz, &y);
1836: idx = a->j;
1837: v = a->a;
1838: ii = a->i;
1840: for (i = 0; i < m; i++) {
1841: jrow = ii[i];
1842: n = ii[i + 1] - jrow;
1843: sum1 = 0.0;
1844: sum2 = 0.0;
1845: sum3 = 0.0;
1846: sum4 = 0.0;
1847: sum5 = 0.0;
1848: sum6 = 0.0;
1849: sum7 = 0.0;
1850: sum8 = 0.0;
1851: sum9 = 0.0;
1852: sum10 = 0.0;
1853: sum11 = 0.0;
1854: for (j = 0; j < n; j++) {
1855: sum1 += v[jrow] * x[11 * idx[jrow]];
1856: sum2 += v[jrow] * x[11 * idx[jrow] + 1];
1857: sum3 += v[jrow] * x[11 * idx[jrow] + 2];
1858: sum4 += v[jrow] * x[11 * idx[jrow] + 3];
1859: sum5 += v[jrow] * x[11 * idx[jrow] + 4];
1860: sum6 += v[jrow] * x[11 * idx[jrow] + 5];
1861: sum7 += v[jrow] * x[11 * idx[jrow] + 6];
1862: sum8 += v[jrow] * x[11 * idx[jrow] + 7];
1863: sum9 += v[jrow] * x[11 * idx[jrow] + 8];
1864: sum10 += v[jrow] * x[11 * idx[jrow] + 9];
1865: sum11 += v[jrow] * x[11 * idx[jrow] + 10];
1866: jrow++;
1867: }
1868: y[11 * i] += sum1;
1869: y[11 * i + 1] += sum2;
1870: y[11 * i + 2] += sum3;
1871: y[11 * i + 3] += sum4;
1872: y[11 * i + 4] += sum5;
1873: y[11 * i + 5] += sum6;
1874: y[11 * i + 6] += sum7;
1875: y[11 * i + 7] += sum8;
1876: y[11 * i + 8] += sum9;
1877: y[11 * i + 9] += sum10;
1878: y[11 * i + 10] += sum11;
1879: }
1881: PetscLogFlops(22.0 * a->nz);
1882: VecRestoreArrayRead(xx, &x);
1883: VecRestoreArray(yy, &y);
1884: return 0;
1885: }
1887: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy)
1888: {
1889: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1890: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1891: const PetscScalar *x, *v;
1892: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1893: const PetscInt m = b->AIJ->rmap->n, *idx;
1894: PetscInt n, i;
1896: VecSet(yy, 0.0);
1897: VecGetArrayRead(xx, &x);
1898: VecGetArray(yy, &y);
1900: for (i = 0; i < m; i++) {
1901: idx = a->j + a->i[i];
1902: v = a->a + a->i[i];
1903: n = a->i[i + 1] - a->i[i];
1904: alpha1 = x[11 * i];
1905: alpha2 = x[11 * i + 1];
1906: alpha3 = x[11 * i + 2];
1907: alpha4 = x[11 * i + 3];
1908: alpha5 = x[11 * i + 4];
1909: alpha6 = x[11 * i + 5];
1910: alpha7 = x[11 * i + 6];
1911: alpha8 = x[11 * i + 7];
1912: alpha9 = x[11 * i + 8];
1913: alpha10 = x[11 * i + 9];
1914: alpha11 = x[11 * i + 10];
1915: while (n-- > 0) {
1916: y[11 * (*idx)] += alpha1 * (*v);
1917: y[11 * (*idx) + 1] += alpha2 * (*v);
1918: y[11 * (*idx) + 2] += alpha3 * (*v);
1919: y[11 * (*idx) + 3] += alpha4 * (*v);
1920: y[11 * (*idx) + 4] += alpha5 * (*v);
1921: y[11 * (*idx) + 5] += alpha6 * (*v);
1922: y[11 * (*idx) + 6] += alpha7 * (*v);
1923: y[11 * (*idx) + 7] += alpha8 * (*v);
1924: y[11 * (*idx) + 8] += alpha9 * (*v);
1925: y[11 * (*idx) + 9] += alpha10 * (*v);
1926: y[11 * (*idx) + 10] += alpha11 * (*v);
1927: idx++;
1928: v++;
1929: }
1930: }
1931: PetscLogFlops(22.0 * a->nz);
1932: VecRestoreArrayRead(xx, &x);
1933: VecRestoreArray(yy, &y);
1934: return 0;
1935: }
1937: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz)
1938: {
1939: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1940: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1941: const PetscScalar *x, *v;
1942: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11;
1943: const PetscInt m = b->AIJ->rmap->n, *idx;
1944: PetscInt n, i;
1946: if (yy != zz) VecCopy(yy, zz);
1947: VecGetArrayRead(xx, &x);
1948: VecGetArray(zz, &y);
1949: for (i = 0; i < m; i++) {
1950: idx = a->j + a->i[i];
1951: v = a->a + a->i[i];
1952: n = a->i[i + 1] - a->i[i];
1953: alpha1 = x[11 * i];
1954: alpha2 = x[11 * i + 1];
1955: alpha3 = x[11 * i + 2];
1956: alpha4 = x[11 * i + 3];
1957: alpha5 = x[11 * i + 4];
1958: alpha6 = x[11 * i + 5];
1959: alpha7 = x[11 * i + 6];
1960: alpha8 = x[11 * i + 7];
1961: alpha9 = x[11 * i + 8];
1962: alpha10 = x[11 * i + 9];
1963: alpha11 = x[11 * i + 10];
1964: while (n-- > 0) {
1965: y[11 * (*idx)] += alpha1 * (*v);
1966: y[11 * (*idx) + 1] += alpha2 * (*v);
1967: y[11 * (*idx) + 2] += alpha3 * (*v);
1968: y[11 * (*idx) + 3] += alpha4 * (*v);
1969: y[11 * (*idx) + 4] += alpha5 * (*v);
1970: y[11 * (*idx) + 5] += alpha6 * (*v);
1971: y[11 * (*idx) + 6] += alpha7 * (*v);
1972: y[11 * (*idx) + 7] += alpha8 * (*v);
1973: y[11 * (*idx) + 8] += alpha9 * (*v);
1974: y[11 * (*idx) + 9] += alpha10 * (*v);
1975: y[11 * (*idx) + 10] += alpha11 * (*v);
1976: idx++;
1977: v++;
1978: }
1979: }
1980: PetscLogFlops(22.0 * a->nz);
1981: VecRestoreArrayRead(xx, &x);
1982: VecRestoreArray(zz, &y);
1983: return 0;
1984: }
1986: /*--------------------------------------------------------------------------------------------*/
1987: PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy)
1988: {
1989: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
1990: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
1991: const PetscScalar *x, *v;
1992: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1993: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1994: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
1995: PetscInt nonzerorow = 0, n, i, jrow, j;
1997: VecGetArrayRead(xx, &x);
1998: VecGetArray(yy, &y);
1999: idx = a->j;
2000: v = a->a;
2001: ii = a->i;
2003: for (i = 0; i < m; i++) {
2004: jrow = ii[i];
2005: n = ii[i + 1] - jrow;
2006: sum1 = 0.0;
2007: sum2 = 0.0;
2008: sum3 = 0.0;
2009: sum4 = 0.0;
2010: sum5 = 0.0;
2011: sum6 = 0.0;
2012: sum7 = 0.0;
2013: sum8 = 0.0;
2014: sum9 = 0.0;
2015: sum10 = 0.0;
2016: sum11 = 0.0;
2017: sum12 = 0.0;
2018: sum13 = 0.0;
2019: sum14 = 0.0;
2020: sum15 = 0.0;
2021: sum16 = 0.0;
2023: nonzerorow += (n > 0);
2024: for (j = 0; j < n; j++) {
2025: sum1 += v[jrow] * x[16 * idx[jrow]];
2026: sum2 += v[jrow] * x[16 * idx[jrow] + 1];
2027: sum3 += v[jrow] * x[16 * idx[jrow] + 2];
2028: sum4 += v[jrow] * x[16 * idx[jrow] + 3];
2029: sum5 += v[jrow] * x[16 * idx[jrow] + 4];
2030: sum6 += v[jrow] * x[16 * idx[jrow] + 5];
2031: sum7 += v[jrow] * x[16 * idx[jrow] + 6];
2032: sum8 += v[jrow] * x[16 * idx[jrow] + 7];
2033: sum9 += v[jrow] * x[16 * idx[jrow] + 8];
2034: sum10 += v[jrow] * x[16 * idx[jrow] + 9];
2035: sum11 += v[jrow] * x[16 * idx[jrow] + 10];
2036: sum12 += v[jrow] * x[16 * idx[jrow] + 11];
2037: sum13 += v[jrow] * x[16 * idx[jrow] + 12];
2038: sum14 += v[jrow] * x[16 * idx[jrow] + 13];
2039: sum15 += v[jrow] * x[16 * idx[jrow] + 14];
2040: sum16 += v[jrow] * x[16 * idx[jrow] + 15];
2041: jrow++;
2042: }
2043: y[16 * i] = sum1;
2044: y[16 * i + 1] = sum2;
2045: y[16 * i + 2] = sum3;
2046: y[16 * i + 3] = sum4;
2047: y[16 * i + 4] = sum5;
2048: y[16 * i + 5] = sum6;
2049: y[16 * i + 6] = sum7;
2050: y[16 * i + 7] = sum8;
2051: y[16 * i + 8] = sum9;
2052: y[16 * i + 9] = sum10;
2053: y[16 * i + 10] = sum11;
2054: y[16 * i + 11] = sum12;
2055: y[16 * i + 12] = sum13;
2056: y[16 * i + 13] = sum14;
2057: y[16 * i + 14] = sum15;
2058: y[16 * i + 15] = sum16;
2059: }
2061: PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow);
2062: VecRestoreArrayRead(xx, &x);
2063: VecRestoreArray(yy, &y);
2064: return 0;
2065: }
2067: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy)
2068: {
2069: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
2070: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
2071: const PetscScalar *x, *v;
2072: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2073: PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2074: const PetscInt m = b->AIJ->rmap->n, *idx;
2075: PetscInt n, i;
2077: VecSet(yy, 0.0);
2078: VecGetArrayRead(xx, &x);
2079: VecGetArray(yy, &y);
2081: for (i = 0; i < m; i++) {
2082: idx = a->j + a->i[i];
2083: v = a->a + a->i[i];
2084: n = a->i[i + 1] - a->i[i];
2085: alpha1 = x[16 * i];
2086: alpha2 = x[16 * i + 1];
2087: alpha3 = x[16 * i + 2];
2088: alpha4 = x[16 * i + 3];
2089: alpha5 = x[16 * i + 4];
2090: alpha6 = x[16 * i + 5];
2091: alpha7 = x[16 * i + 6];
2092: alpha8 = x[16 * i + 7];
2093: alpha9 = x[16 * i + 8];
2094: alpha10 = x[16 * i + 9];
2095: alpha11 = x[16 * i + 10];
2096: alpha12 = x[16 * i + 11];
2097: alpha13 = x[16 * i + 12];
2098: alpha14 = x[16 * i + 13];
2099: alpha15 = x[16 * i + 14];
2100: alpha16 = x[16 * i + 15];
2101: while (n-- > 0) {
2102: y[16 * (*idx)] += alpha1 * (*v);
2103: y[16 * (*idx) + 1] += alpha2 * (*v);
2104: y[16 * (*idx) + 2] += alpha3 * (*v);
2105: y[16 * (*idx) + 3] += alpha4 * (*v);
2106: y[16 * (*idx) + 4] += alpha5 * (*v);
2107: y[16 * (*idx) + 5] += alpha6 * (*v);
2108: y[16 * (*idx) + 6] += alpha7 * (*v);
2109: y[16 * (*idx) + 7] += alpha8 * (*v);
2110: y[16 * (*idx) + 8] += alpha9 * (*v);
2111: y[16 * (*idx) + 9] += alpha10 * (*v);
2112: y[16 * (*idx) + 10] += alpha11 * (*v);
2113: y[16 * (*idx) + 11] += alpha12 * (*v);
2114: y[16 * (*idx) + 12] += alpha13 * (*v);
2115: y[16 * (*idx) + 13] += alpha14 * (*v);
2116: y[16 * (*idx) + 14] += alpha15 * (*v);
2117: y[16 * (*idx) + 15] += alpha16 * (*v);
2118: idx++;
2119: v++;
2120: }
2121: }
2122: PetscLogFlops(32.0 * a->nz);
2123: VecRestoreArrayRead(xx, &x);
2124: VecRestoreArray(yy, &y);
2125: return 0;
2126: }
2128: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz)
2129: {
2130: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
2131: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
2132: const PetscScalar *x, *v;
2133: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2134: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2135: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
2136: PetscInt n, i, jrow, j;
2138: if (yy != zz) VecCopy(yy, zz);
2139: VecGetArrayRead(xx, &x);
2140: VecGetArray(zz, &y);
2141: idx = a->j;
2142: v = a->a;
2143: ii = a->i;
2145: for (i = 0; i < m; i++) {
2146: jrow = ii[i];
2147: n = ii[i + 1] - jrow;
2148: sum1 = 0.0;
2149: sum2 = 0.0;
2150: sum3 = 0.0;
2151: sum4 = 0.0;
2152: sum5 = 0.0;
2153: sum6 = 0.0;
2154: sum7 = 0.0;
2155: sum8 = 0.0;
2156: sum9 = 0.0;
2157: sum10 = 0.0;
2158: sum11 = 0.0;
2159: sum12 = 0.0;
2160: sum13 = 0.0;
2161: sum14 = 0.0;
2162: sum15 = 0.0;
2163: sum16 = 0.0;
2164: for (j = 0; j < n; j++) {
2165: sum1 += v[jrow] * x[16 * idx[jrow]];
2166: sum2 += v[jrow] * x[16 * idx[jrow] + 1];
2167: sum3 += v[jrow] * x[16 * idx[jrow] + 2];
2168: sum4 += v[jrow] * x[16 * idx[jrow] + 3];
2169: sum5 += v[jrow] * x[16 * idx[jrow] + 4];
2170: sum6 += v[jrow] * x[16 * idx[jrow] + 5];
2171: sum7 += v[jrow] * x[16 * idx[jrow] + 6];
2172: sum8 += v[jrow] * x[16 * idx[jrow] + 7];
2173: sum9 += v[jrow] * x[16 * idx[jrow] + 8];
2174: sum10 += v[jrow] * x[16 * idx[jrow] + 9];
2175: sum11 += v[jrow] * x[16 * idx[jrow] + 10];
2176: sum12 += v[jrow] * x[16 * idx[jrow] + 11];
2177: sum13 += v[jrow] * x[16 * idx[jrow] + 12];
2178: sum14 += v[jrow] * x[16 * idx[jrow] + 13];
2179: sum15 += v[jrow] * x[16 * idx[jrow] + 14];
2180: sum16 += v[jrow] * x[16 * idx[jrow] + 15];
2181: jrow++;
2182: }
2183: y[16 * i] += sum1;
2184: y[16 * i + 1] += sum2;
2185: y[16 * i + 2] += sum3;
2186: y[16 * i + 3] += sum4;
2187: y[16 * i + 4] += sum5;
2188: y[16 * i + 5] += sum6;
2189: y[16 * i + 6] += sum7;
2190: y[16 * i + 7] += sum8;
2191: y[16 * i + 8] += sum9;
2192: y[16 * i + 9] += sum10;
2193: y[16 * i + 10] += sum11;
2194: y[16 * i + 11] += sum12;
2195: y[16 * i + 12] += sum13;
2196: y[16 * i + 13] += sum14;
2197: y[16 * i + 14] += sum15;
2198: y[16 * i + 15] += sum16;
2199: }
2201: PetscLogFlops(32.0 * a->nz);
2202: VecRestoreArrayRead(xx, &x);
2203: VecRestoreArray(zz, &y);
2204: return 0;
2205: }
2207: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz)
2208: {
2209: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
2210: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
2211: const PetscScalar *x, *v;
2212: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2213: PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16;
2214: const PetscInt m = b->AIJ->rmap->n, *idx;
2215: PetscInt n, i;
2217: if (yy != zz) VecCopy(yy, zz);
2218: VecGetArrayRead(xx, &x);
2219: VecGetArray(zz, &y);
2220: for (i = 0; i < m; i++) {
2221: idx = a->j + a->i[i];
2222: v = a->a + a->i[i];
2223: n = a->i[i + 1] - a->i[i];
2224: alpha1 = x[16 * i];
2225: alpha2 = x[16 * i + 1];
2226: alpha3 = x[16 * i + 2];
2227: alpha4 = x[16 * i + 3];
2228: alpha5 = x[16 * i + 4];
2229: alpha6 = x[16 * i + 5];
2230: alpha7 = x[16 * i + 6];
2231: alpha8 = x[16 * i + 7];
2232: alpha9 = x[16 * i + 8];
2233: alpha10 = x[16 * i + 9];
2234: alpha11 = x[16 * i + 10];
2235: alpha12 = x[16 * i + 11];
2236: alpha13 = x[16 * i + 12];
2237: alpha14 = x[16 * i + 13];
2238: alpha15 = x[16 * i + 14];
2239: alpha16 = x[16 * i + 15];
2240: while (n-- > 0) {
2241: y[16 * (*idx)] += alpha1 * (*v);
2242: y[16 * (*idx) + 1] += alpha2 * (*v);
2243: y[16 * (*idx) + 2] += alpha3 * (*v);
2244: y[16 * (*idx) + 3] += alpha4 * (*v);
2245: y[16 * (*idx) + 4] += alpha5 * (*v);
2246: y[16 * (*idx) + 5] += alpha6 * (*v);
2247: y[16 * (*idx) + 6] += alpha7 * (*v);
2248: y[16 * (*idx) + 7] += alpha8 * (*v);
2249: y[16 * (*idx) + 8] += alpha9 * (*v);
2250: y[16 * (*idx) + 9] += alpha10 * (*v);
2251: y[16 * (*idx) + 10] += alpha11 * (*v);
2252: y[16 * (*idx) + 11] += alpha12 * (*v);
2253: y[16 * (*idx) + 12] += alpha13 * (*v);
2254: y[16 * (*idx) + 13] += alpha14 * (*v);
2255: y[16 * (*idx) + 14] += alpha15 * (*v);
2256: y[16 * (*idx) + 15] += alpha16 * (*v);
2257: idx++;
2258: v++;
2259: }
2260: }
2261: PetscLogFlops(32.0 * a->nz);
2262: VecRestoreArrayRead(xx, &x);
2263: VecRestoreArray(zz, &y);
2264: return 0;
2265: }
2267: /*--------------------------------------------------------------------------------------------*/
2268: PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy)
2269: {
2270: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
2271: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
2272: const PetscScalar *x, *v;
2273: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2274: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2275: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
2276: PetscInt nonzerorow = 0, n, i, jrow, j;
2278: VecGetArrayRead(xx, &x);
2279: VecGetArray(yy, &y);
2280: idx = a->j;
2281: v = a->a;
2282: ii = a->i;
2284: for (i = 0; i < m; i++) {
2285: jrow = ii[i];
2286: n = ii[i + 1] - jrow;
2287: sum1 = 0.0;
2288: sum2 = 0.0;
2289: sum3 = 0.0;
2290: sum4 = 0.0;
2291: sum5 = 0.0;
2292: sum6 = 0.0;
2293: sum7 = 0.0;
2294: sum8 = 0.0;
2295: sum9 = 0.0;
2296: sum10 = 0.0;
2297: sum11 = 0.0;
2298: sum12 = 0.0;
2299: sum13 = 0.0;
2300: sum14 = 0.0;
2301: sum15 = 0.0;
2302: sum16 = 0.0;
2303: sum17 = 0.0;
2304: sum18 = 0.0;
2306: nonzerorow += (n > 0);
2307: for (j = 0; j < n; j++) {
2308: sum1 += v[jrow] * x[18 * idx[jrow]];
2309: sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2310: sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2311: sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2312: sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2313: sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2314: sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2315: sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2316: sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2317: sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2318: sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2319: sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2320: sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2321: sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2322: sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2323: sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2324: sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2325: sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2326: jrow++;
2327: }
2328: y[18 * i] = sum1;
2329: y[18 * i + 1] = sum2;
2330: y[18 * i + 2] = sum3;
2331: y[18 * i + 3] = sum4;
2332: y[18 * i + 4] = sum5;
2333: y[18 * i + 5] = sum6;
2334: y[18 * i + 6] = sum7;
2335: y[18 * i + 7] = sum8;
2336: y[18 * i + 8] = sum9;
2337: y[18 * i + 9] = sum10;
2338: y[18 * i + 10] = sum11;
2339: y[18 * i + 11] = sum12;
2340: y[18 * i + 12] = sum13;
2341: y[18 * i + 13] = sum14;
2342: y[18 * i + 14] = sum15;
2343: y[18 * i + 15] = sum16;
2344: y[18 * i + 16] = sum17;
2345: y[18 * i + 17] = sum18;
2346: }
2348: PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow);
2349: VecRestoreArrayRead(xx, &x);
2350: VecRestoreArray(yy, &y);
2351: return 0;
2352: }
2354: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy)
2355: {
2356: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
2357: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
2358: const PetscScalar *x, *v;
2359: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2360: PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2361: const PetscInt m = b->AIJ->rmap->n, *idx;
2362: PetscInt n, i;
2364: VecSet(yy, 0.0);
2365: VecGetArrayRead(xx, &x);
2366: VecGetArray(yy, &y);
2368: for (i = 0; i < m; i++) {
2369: idx = a->j + a->i[i];
2370: v = a->a + a->i[i];
2371: n = a->i[i + 1] - a->i[i];
2372: alpha1 = x[18 * i];
2373: alpha2 = x[18 * i + 1];
2374: alpha3 = x[18 * i + 2];
2375: alpha4 = x[18 * i + 3];
2376: alpha5 = x[18 * i + 4];
2377: alpha6 = x[18 * i + 5];
2378: alpha7 = x[18 * i + 6];
2379: alpha8 = x[18 * i + 7];
2380: alpha9 = x[18 * i + 8];
2381: alpha10 = x[18 * i + 9];
2382: alpha11 = x[18 * i + 10];
2383: alpha12 = x[18 * i + 11];
2384: alpha13 = x[18 * i + 12];
2385: alpha14 = x[18 * i + 13];
2386: alpha15 = x[18 * i + 14];
2387: alpha16 = x[18 * i + 15];
2388: alpha17 = x[18 * i + 16];
2389: alpha18 = x[18 * i + 17];
2390: while (n-- > 0) {
2391: y[18 * (*idx)] += alpha1 * (*v);
2392: y[18 * (*idx) + 1] += alpha2 * (*v);
2393: y[18 * (*idx) + 2] += alpha3 * (*v);
2394: y[18 * (*idx) + 3] += alpha4 * (*v);
2395: y[18 * (*idx) + 4] += alpha5 * (*v);
2396: y[18 * (*idx) + 5] += alpha6 * (*v);
2397: y[18 * (*idx) + 6] += alpha7 * (*v);
2398: y[18 * (*idx) + 7] += alpha8 * (*v);
2399: y[18 * (*idx) + 8] += alpha9 * (*v);
2400: y[18 * (*idx) + 9] += alpha10 * (*v);
2401: y[18 * (*idx) + 10] += alpha11 * (*v);
2402: y[18 * (*idx) + 11] += alpha12 * (*v);
2403: y[18 * (*idx) + 12] += alpha13 * (*v);
2404: y[18 * (*idx) + 13] += alpha14 * (*v);
2405: y[18 * (*idx) + 14] += alpha15 * (*v);
2406: y[18 * (*idx) + 15] += alpha16 * (*v);
2407: y[18 * (*idx) + 16] += alpha17 * (*v);
2408: y[18 * (*idx) + 17] += alpha18 * (*v);
2409: idx++;
2410: v++;
2411: }
2412: }
2413: PetscLogFlops(36.0 * a->nz);
2414: VecRestoreArrayRead(xx, &x);
2415: VecRestoreArray(yy, &y);
2416: return 0;
2417: }
2419: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz)
2420: {
2421: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
2422: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
2423: const PetscScalar *x, *v;
2424: PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2425: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2426: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
2427: PetscInt n, i, jrow, j;
2429: if (yy != zz) VecCopy(yy, zz);
2430: VecGetArrayRead(xx, &x);
2431: VecGetArray(zz, &y);
2432: idx = a->j;
2433: v = a->a;
2434: ii = a->i;
2436: for (i = 0; i < m; i++) {
2437: jrow = ii[i];
2438: n = ii[i + 1] - jrow;
2439: sum1 = 0.0;
2440: sum2 = 0.0;
2441: sum3 = 0.0;
2442: sum4 = 0.0;
2443: sum5 = 0.0;
2444: sum6 = 0.0;
2445: sum7 = 0.0;
2446: sum8 = 0.0;
2447: sum9 = 0.0;
2448: sum10 = 0.0;
2449: sum11 = 0.0;
2450: sum12 = 0.0;
2451: sum13 = 0.0;
2452: sum14 = 0.0;
2453: sum15 = 0.0;
2454: sum16 = 0.0;
2455: sum17 = 0.0;
2456: sum18 = 0.0;
2457: for (j = 0; j < n; j++) {
2458: sum1 += v[jrow] * x[18 * idx[jrow]];
2459: sum2 += v[jrow] * x[18 * idx[jrow] + 1];
2460: sum3 += v[jrow] * x[18 * idx[jrow] + 2];
2461: sum4 += v[jrow] * x[18 * idx[jrow] + 3];
2462: sum5 += v[jrow] * x[18 * idx[jrow] + 4];
2463: sum6 += v[jrow] * x[18 * idx[jrow] + 5];
2464: sum7 += v[jrow] * x[18 * idx[jrow] + 6];
2465: sum8 += v[jrow] * x[18 * idx[jrow] + 7];
2466: sum9 += v[jrow] * x[18 * idx[jrow] + 8];
2467: sum10 += v[jrow] * x[18 * idx[jrow] + 9];
2468: sum11 += v[jrow] * x[18 * idx[jrow] + 10];
2469: sum12 += v[jrow] * x[18 * idx[jrow] + 11];
2470: sum13 += v[jrow] * x[18 * idx[jrow] + 12];
2471: sum14 += v[jrow] * x[18 * idx[jrow] + 13];
2472: sum15 += v[jrow] * x[18 * idx[jrow] + 14];
2473: sum16 += v[jrow] * x[18 * idx[jrow] + 15];
2474: sum17 += v[jrow] * x[18 * idx[jrow] + 16];
2475: sum18 += v[jrow] * x[18 * idx[jrow] + 17];
2476: jrow++;
2477: }
2478: y[18 * i] += sum1;
2479: y[18 * i + 1] += sum2;
2480: y[18 * i + 2] += sum3;
2481: y[18 * i + 3] += sum4;
2482: y[18 * i + 4] += sum5;
2483: y[18 * i + 5] += sum6;
2484: y[18 * i + 6] += sum7;
2485: y[18 * i + 7] += sum8;
2486: y[18 * i + 8] += sum9;
2487: y[18 * i + 9] += sum10;
2488: y[18 * i + 10] += sum11;
2489: y[18 * i + 11] += sum12;
2490: y[18 * i + 12] += sum13;
2491: y[18 * i + 13] += sum14;
2492: y[18 * i + 14] += sum15;
2493: y[18 * i + 15] += sum16;
2494: y[18 * i + 16] += sum17;
2495: y[18 * i + 17] += sum18;
2496: }
2498: PetscLogFlops(36.0 * a->nz);
2499: VecRestoreArrayRead(xx, &x);
2500: VecRestoreArray(zz, &y);
2501: return 0;
2502: }
2504: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz)
2505: {
2506: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
2507: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
2508: const PetscScalar *x, *v;
2509: PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8;
2510: PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18;
2511: const PetscInt m = b->AIJ->rmap->n, *idx;
2512: PetscInt n, i;
2514: if (yy != zz) VecCopy(yy, zz);
2515: VecGetArrayRead(xx, &x);
2516: VecGetArray(zz, &y);
2517: for (i = 0; i < m; i++) {
2518: idx = a->j + a->i[i];
2519: v = a->a + a->i[i];
2520: n = a->i[i + 1] - a->i[i];
2521: alpha1 = x[18 * i];
2522: alpha2 = x[18 * i + 1];
2523: alpha3 = x[18 * i + 2];
2524: alpha4 = x[18 * i + 3];
2525: alpha5 = x[18 * i + 4];
2526: alpha6 = x[18 * i + 5];
2527: alpha7 = x[18 * i + 6];
2528: alpha8 = x[18 * i + 7];
2529: alpha9 = x[18 * i + 8];
2530: alpha10 = x[18 * i + 9];
2531: alpha11 = x[18 * i + 10];
2532: alpha12 = x[18 * i + 11];
2533: alpha13 = x[18 * i + 12];
2534: alpha14 = x[18 * i + 13];
2535: alpha15 = x[18 * i + 14];
2536: alpha16 = x[18 * i + 15];
2537: alpha17 = x[18 * i + 16];
2538: alpha18 = x[18 * i + 17];
2539: while (n-- > 0) {
2540: y[18 * (*idx)] += alpha1 * (*v);
2541: y[18 * (*idx) + 1] += alpha2 * (*v);
2542: y[18 * (*idx) + 2] += alpha3 * (*v);
2543: y[18 * (*idx) + 3] += alpha4 * (*v);
2544: y[18 * (*idx) + 4] += alpha5 * (*v);
2545: y[18 * (*idx) + 5] += alpha6 * (*v);
2546: y[18 * (*idx) + 6] += alpha7 * (*v);
2547: y[18 * (*idx) + 7] += alpha8 * (*v);
2548: y[18 * (*idx) + 8] += alpha9 * (*v);
2549: y[18 * (*idx) + 9] += alpha10 * (*v);
2550: y[18 * (*idx) + 10] += alpha11 * (*v);
2551: y[18 * (*idx) + 11] += alpha12 * (*v);
2552: y[18 * (*idx) + 12] += alpha13 * (*v);
2553: y[18 * (*idx) + 13] += alpha14 * (*v);
2554: y[18 * (*idx) + 14] += alpha15 * (*v);
2555: y[18 * (*idx) + 15] += alpha16 * (*v);
2556: y[18 * (*idx) + 16] += alpha17 * (*v);
2557: y[18 * (*idx) + 17] += alpha18 * (*v);
2558: idx++;
2559: v++;
2560: }
2561: }
2562: PetscLogFlops(36.0 * a->nz);
2563: VecRestoreArrayRead(xx, &x);
2564: VecRestoreArray(zz, &y);
2565: return 0;
2566: }
2568: PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
2569: {
2570: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
2571: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
2572: const PetscScalar *x, *v;
2573: PetscScalar *y, *sums;
2574: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
2575: PetscInt n, i, jrow, j, dof = b->dof, k;
2577: VecGetArrayRead(xx, &x);
2578: VecSet(yy, 0.0);
2579: VecGetArray(yy, &y);
2580: idx = a->j;
2581: v = a->a;
2582: ii = a->i;
2584: for (i = 0; i < m; i++) {
2585: jrow = ii[i];
2586: n = ii[i + 1] - jrow;
2587: sums = y + dof * i;
2588: for (j = 0; j < n; j++) {
2589: for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
2590: jrow++;
2591: }
2592: }
2594: PetscLogFlops(2.0 * dof * a->nz);
2595: VecRestoreArrayRead(xx, &x);
2596: VecRestoreArray(yy, &y);
2597: return 0;
2598: }
2600: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
2601: {
2602: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
2603: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
2604: const PetscScalar *x, *v;
2605: PetscScalar *y, *sums;
2606: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
2607: PetscInt n, i, jrow, j, dof = b->dof, k;
2609: if (yy != zz) VecCopy(yy, zz);
2610: VecGetArrayRead(xx, &x);
2611: VecGetArray(zz, &y);
2612: idx = a->j;
2613: v = a->a;
2614: ii = a->i;
2616: for (i = 0; i < m; i++) {
2617: jrow = ii[i];
2618: n = ii[i + 1] - jrow;
2619: sums = y + dof * i;
2620: for (j = 0; j < n; j++) {
2621: for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
2622: jrow++;
2623: }
2624: }
2626: PetscLogFlops(2.0 * dof * a->nz);
2627: VecRestoreArrayRead(xx, &x);
2628: VecRestoreArray(zz, &y);
2629: return 0;
2630: }
2632: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
2633: {
2634: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
2635: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
2636: const PetscScalar *x, *v, *alpha;
2637: PetscScalar *y;
2638: const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof;
2639: PetscInt n, i, k;
2641: VecGetArrayRead(xx, &x);
2642: VecSet(yy, 0.0);
2643: VecGetArray(yy, &y);
2644: for (i = 0; i < m; i++) {
2645: idx = a->j + a->i[i];
2646: v = a->a + a->i[i];
2647: n = a->i[i + 1] - a->i[i];
2648: alpha = x + dof * i;
2649: while (n-- > 0) {
2650: for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
2651: idx++;
2652: v++;
2653: }
2654: }
2655: PetscLogFlops(2.0 * dof * a->nz);
2656: VecRestoreArrayRead(xx, &x);
2657: VecRestoreArray(yy, &y);
2658: return 0;
2659: }
2661: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
2662: {
2663: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
2664: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
2665: const PetscScalar *x, *v, *alpha;
2666: PetscScalar *y;
2667: const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof;
2668: PetscInt n, i, k;
2670: if (yy != zz) VecCopy(yy, zz);
2671: VecGetArrayRead(xx, &x);
2672: VecGetArray(zz, &y);
2673: for (i = 0; i < m; i++) {
2674: idx = a->j + a->i[i];
2675: v = a->a + a->i[i];
2676: n = a->i[i + 1] - a->i[i];
2677: alpha = x + dof * i;
2678: while (n-- > 0) {
2679: for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
2680: idx++;
2681: v++;
2682: }
2683: }
2684: PetscLogFlops(2.0 * dof * a->nz);
2685: VecRestoreArrayRead(xx, &x);
2686: VecRestoreArray(zz, &y);
2687: return 0;
2688: }
2690: /*===================================================================================*/
2691: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
2692: {
2693: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2695: /* start the scatter */
2696: VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD);
2697: (*b->AIJ->ops->mult)(b->AIJ, xx, yy);
2698: VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD);
2699: (*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy);
2700: return 0;
2701: }
2703: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
2704: {
2705: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2707: (*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w);
2708: (*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy);
2709: VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE);
2710: VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE);
2711: return 0;
2712: }
2714: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
2715: {
2716: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2718: /* start the scatter */
2719: VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD);
2720: (*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz);
2721: VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD);
2722: (*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz);
2723: return 0;
2724: }
2726: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
2727: {
2728: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
2730: (*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w);
2731: (*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz);
2732: VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE);
2733: VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE);
2734: return 0;
2735: }
2737: /* ----------------------------------------------------------------*/
2738: PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2739: {
2740: Mat_Product *product = C->product;
2742: if (product->type == MATPRODUCT_PtAP) {
2743: C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2744: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
2745: return 0;
2746: }
2748: PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2749: {
2750: Mat_Product *product = C->product;
2751: PetscBool flg = PETSC_FALSE;
2752: Mat A = product->A, P = product->B;
2753: PetscInt alg = 1; /* set default algorithm */
2754: #if !defined(PETSC_HAVE_HYPRE)
2755: const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
2756: PetscInt nalg = 4;
2757: #else
2758: const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
2759: PetscInt nalg = 5;
2760: #endif
2764: /* PtAP */
2765: /* Check matrix local sizes */
2767: A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
2769: A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
2771: /* Set the default algorithm */
2772: PetscStrcmp(C->product->alg, "default", &flg);
2773: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2775: /* Get runtime option */
2776: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
2777: PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg);
2778: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2779: PetscOptionsEnd();
2781: PetscStrcmp(C->product->alg, "allatonce", &flg);
2782: if (flg) {
2783: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2784: return 0;
2785: }
2787: PetscStrcmp(C->product->alg, "allatonce_merged", &flg);
2788: if (flg) {
2789: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2790: return 0;
2791: }
2793: /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2794: PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");
2795: MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P);
2796: MatProductSetFromOptions(C);
2797: return 0;
2798: }
2800: /* ----------------------------------------------------------------*/
2801: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
2802: {
2803: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2804: Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data;
2805: Mat P = pp->AIJ;
2806: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
2807: PetscInt *pti, *ptj, *ptJ;
2808: PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
2809: const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
2810: PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
2811: MatScalar *ca;
2812: const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
2814: /* Get ij structure of P^T */
2815: MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj);
2817: cn = pn * ppdof;
2818: /* Allocate ci array, arrays for fill computation and */
2819: /* free space for accumulating nonzero column info */
2820: PetscMalloc1(cn + 1, &ci);
2821: ci[0] = 0;
2823: /* Work arrays for rows of P^T*A */
2824: PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow);
2825: PetscArrayzero(ptadenserow, an);
2826: PetscArrayzero(denserow, cn);
2828: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2829: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2830: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2831: PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space);
2832: current_space = free_space;
2834: /* Determine symbolic info for each row of C: */
2835: for (i = 0; i < pn; i++) {
2836: ptnzi = pti[i + 1] - pti[i];
2837: ptJ = ptj + pti[i];
2838: for (dof = 0; dof < ppdof; dof++) {
2839: ptanzi = 0;
2840: /* Determine symbolic row of PtA: */
2841: for (j = 0; j < ptnzi; j++) {
2842: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2843: arow = ptJ[j] * ppdof + dof;
2844: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2845: anzj = ai[arow + 1] - ai[arow];
2846: ajj = aj + ai[arow];
2847: for (k = 0; k < anzj; k++) {
2848: if (!ptadenserow[ajj[k]]) {
2849: ptadenserow[ajj[k]] = -1;
2850: ptasparserow[ptanzi++] = ajj[k];
2851: }
2852: }
2853: }
2854: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2855: ptaj = ptasparserow;
2856: cnzi = 0;
2857: for (j = 0; j < ptanzi; j++) {
2858: /* Get offset within block of P */
2859: pshift = *ptaj % ppdof;
2860: /* Get block row of P */
2861: prow = (*ptaj++) / ppdof; /* integer division */
2862: /* P has same number of nonzeros per row as the compressed form */
2863: pnzj = pi[prow + 1] - pi[prow];
2864: pjj = pj + pi[prow];
2865: for (k = 0; k < pnzj; k++) {
2866: /* Locations in C are shifted by the offset within the block */
2867: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2868: if (!denserow[pjj[k] * ppdof + pshift]) {
2869: denserow[pjj[k] * ppdof + pshift] = -1;
2870: sparserow[cnzi++] = pjj[k] * ppdof + pshift;
2871: }
2872: }
2873: }
2875: /* sort sparserow */
2876: PetscSortInt(cnzi, sparserow);
2878: /* If free space is not available, make more free space */
2879: /* Double the amount of total space in the list */
2880: if (current_space->local_remaining < cnzi) PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space);
2882: /* Copy data into free space, and zero out denserows */
2883: PetscArraycpy(current_space->array, sparserow, cnzi);
2885: current_space->array += cnzi;
2886: current_space->local_used += cnzi;
2887: current_space->local_remaining -= cnzi;
2889: for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2890: for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
2892: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2893: /* For now, we will recompute what is needed. */
2894: ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
2895: }
2896: }
2897: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2898: /* Allocate space for cj, initialize cj, and */
2899: /* destroy list of free space and other temporary array(s) */
2900: PetscMalloc1(ci[cn] + 1, &cj);
2901: PetscFreeSpaceContiguous(&free_space, cj);
2902: PetscFree4(ptadenserow, ptasparserow, denserow, sparserow);
2904: /* Allocate space for ca */
2905: PetscCalloc1(ci[cn] + 1, &ca);
2907: /* put together the new matrix */
2908: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C);
2909: MatSetBlockSize(C, pp->dof);
2911: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2912: /* Since these are PETSc arrays, change flags to free them as necessary. */
2913: c = (Mat_SeqAIJ *)(C->data);
2914: c->free_a = PETSC_TRUE;
2915: c->free_ij = PETSC_TRUE;
2916: c->nonew = 0;
2918: C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2919: C->ops->productnumeric = MatProductNumeric_PtAP;
2921: /* Clean up. */
2922: MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj);
2923: return 0;
2924: }
2926: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
2927: {
2928: /* This routine requires testing -- first draft only */
2929: Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data;
2930: Mat P = pp->AIJ;
2931: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2932: Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data;
2933: Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
2934: const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
2935: const PetscInt *ci = c->i, *cj = c->j, *cjj;
2936: const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
2937: PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
2938: const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
2939: MatScalar *ca = c->a, *caj, *apa;
2941: /* Allocate temporary array for storage of one row of A*P */
2942: PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense);
2944: /* Clear old values in C */
2945: PetscArrayzero(ca, ci[cm]);
2947: for (i = 0; i < am; i++) {
2948: /* Form sparse row of A*P */
2949: anzi = ai[i + 1] - ai[i];
2950: apnzj = 0;
2951: for (j = 0; j < anzi; j++) {
2952: /* Get offset within block of P */
2953: pshift = *aj % ppdof;
2954: /* Get block row of P */
2955: prow = *aj++ / ppdof; /* integer division */
2956: pnzj = pi[prow + 1] - pi[prow];
2957: pjj = pj + pi[prow];
2958: paj = pa + pi[prow];
2959: for (k = 0; k < pnzj; k++) {
2960: poffset = pjj[k] * ppdof + pshift;
2961: if (!apjdense[poffset]) {
2962: apjdense[poffset] = -1;
2963: apj[apnzj++] = poffset;
2964: }
2965: apa[poffset] += (*aa) * paj[k];
2966: }
2967: PetscLogFlops(2.0 * pnzj);
2968: aa++;
2969: }
2971: /* Sort the j index array for quick sparse axpy. */
2972: /* Note: a array does not need sorting as it is in dense storage locations. */
2973: PetscSortInt(apnzj, apj);
2975: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2976: prow = i / ppdof; /* integer division */
2977: pshift = i % ppdof;
2978: poffset = pi[prow];
2979: pnzi = pi[prow + 1] - poffset;
2980: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2981: pJ = pj + poffset;
2982: pA = pa + poffset;
2983: for (j = 0; j < pnzi; j++) {
2984: crow = (*pJ) * ppdof + pshift;
2985: cjj = cj + ci[crow];
2986: caj = ca + ci[crow];
2987: pJ++;
2988: /* Perform sparse axpy operation. Note cjj includes apj. */
2989: for (k = 0, nextap = 0; nextap < apnzj; k++) {
2990: if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
2991: }
2992: PetscLogFlops(2.0 * apnzj);
2993: pA++;
2994: }
2996: /* Zero the current row info for A*P */
2997: for (j = 0; j < apnzj; j++) {
2998: apa[apj[j]] = 0.;
2999: apjdense[apj[j]] = 0;
3000: }
3001: }
3003: /* Assemble the final matrix and clean up */
3004: MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
3005: MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
3006: PetscFree3(apa, apj, apjdense);
3007: return 0;
3008: }
3010: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3011: {
3012: Mat_Product *product = C->product;
3013: Mat A = product->A, P = product->B;
3015: MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C);
3016: return 0;
3017: }
3019: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C)
3020: {
3021: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3022: }
3024: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C)
3025: {
3026: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3027: }
3029: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
3031: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
3032: {
3033: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3036: MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C);
3037: return 0;
3038: }
3040: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
3042: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
3043: {
3044: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3046: MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C);
3047: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3048: return 0;
3049: }
3051: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
3053: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
3054: {
3055: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3058: MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C);
3059: return 0;
3060: }
3062: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
3064: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
3065: {
3066: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
3069: MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C);
3070: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3071: return 0;
3072: }
3074: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3075: {
3076: Mat_Product *product = C->product;
3077: Mat A = product->A, P = product->B;
3078: PetscBool flg;
3080: PetscStrcmp(product->alg, "allatonce", &flg);
3081: if (flg) {
3082: MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C);
3083: C->ops->productnumeric = MatProductNumeric_PtAP;
3084: return 0;
3085: }
3087: PetscStrcmp(product->alg, "allatonce_merged", &flg);
3088: if (flg) {
3089: MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C);
3090: C->ops->productnumeric = MatProductNumeric_PtAP;
3091: return 0;
3092: }
3094: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
3095: }
3097: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
3098: {
3099: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
3100: Mat a = b->AIJ, B;
3101: Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data;
3102: PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
3103: PetscInt *cols;
3104: PetscScalar *vals;
3106: MatGetSize(a, &m, &n);
3107: PetscMalloc1(dof * m, &ilen);
3108: for (i = 0; i < m; i++) {
3109: nmax = PetscMax(nmax, aij->ilen[i]);
3110: for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
3111: }
3112: MatCreate(PETSC_COMM_SELF, &B);
3113: MatSetSizes(B, dof * m, dof * n, dof * m, dof * n);
3114: MatSetType(B, newtype);
3115: MatSeqAIJSetPreallocation(B, 0, ilen);
3116: PetscFree(ilen);
3117: PetscMalloc1(nmax, &icols);
3118: ii = 0;
3119: for (i = 0; i < m; i++) {
3120: MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals);
3121: for (j = 0; j < dof; j++) {
3122: for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
3123: MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES);
3124: ii++;
3125: }
3126: MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals);
3127: }
3128: PetscFree(icols);
3129: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
3130: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
3132: if (reuse == MAT_INPLACE_MATRIX) {
3133: MatHeaderReplace(A, &B);
3134: } else {
3135: *newmat = B;
3136: }
3137: return 0;
3138: }
3140: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3142: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
3143: {
3144: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data;
3145: Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
3146: Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
3147: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data;
3148: Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data;
3149: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data;
3150: PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
3151: PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
3152: PetscInt rstart, cstart, *garray, ii, k;
3153: PetscScalar *vals, *ovals;
3155: PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz);
3156: for (i = 0; i < A->rmap->n / dof; i++) {
3157: nmax = PetscMax(nmax, AIJ->ilen[i]);
3158: onmax = PetscMax(onmax, OAIJ->ilen[i]);
3159: for (j = 0; j < dof; j++) {
3160: dnz[dof * i + j] = AIJ->ilen[i];
3161: onz[dof * i + j] = OAIJ->ilen[i];
3162: }
3163: }
3164: MatCreate(PetscObjectComm((PetscObject)A), &B);
3165: MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
3166: MatSetType(B, newtype);
3167: MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz);
3168: MatSetBlockSize(B, dof);
3169: PetscFree2(dnz, onz);
3171: PetscMalloc2(nmax, &icols, onmax, &oicols);
3172: rstart = dof * maij->A->rmap->rstart;
3173: cstart = dof * maij->A->cmap->rstart;
3174: garray = mpiaij->garray;
3176: ii = rstart;
3177: for (i = 0; i < A->rmap->n / dof; i++) {
3178: MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals);
3179: MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals);
3180: for (j = 0; j < dof; j++) {
3181: for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
3182: for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
3183: MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES);
3184: MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES);
3185: ii++;
3186: }
3187: MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals);
3188: MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals);
3189: }
3190: PetscFree2(icols, oicols);
3192: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
3193: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
3195: if (reuse == MAT_INPLACE_MATRIX) {
3196: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3197: ((PetscObject)A)->refct = 1;
3199: MatHeaderReplace(A, &B);
3201: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3202: } else {
3203: *newmat = B;
3204: }
3205: return 0;
3206: }
3208: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
3209: {
3210: Mat A;
3212: MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A);
3213: MatCreateSubMatrix(A, isrow, iscol, cll, newmat);
3214: MatDestroy(&A);
3215: return 0;
3216: }
3218: PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
3219: {
3220: Mat A;
3222: MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A);
3223: MatCreateSubMatrices(A, n, irow, icol, scall, submat);
3224: MatDestroy(&A);
3225: return 0;
3226: }
3228: /* ---------------------------------------------------------------------------------- */
3229: /*@
3230: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3231: operations for multicomponent problems. It interpolates each component the same
3232: way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices,
3233: and `MATMPIAIJ` for distributed matrices.
3235: Collective
3237: Input Parameters:
3238: + A - the `MATAIJ` matrix describing the action on blocks
3239: - dof - the block size (number of components per node)
3241: Output Parameter:
3242: . maij - the new `MATMAIJ` matrix
3244: Operations provided:
3245: .vb
3246: MatMult()
3247: MatMultTranspose()
3248: MatMultAdd()
3249: MatMultTransposeAdd()
3250: MatView()
3251: .ve
3253: Level: advanced
3255: .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ`
3256: @*/
3257: PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
3258: {
3259: PetscInt n;
3260: Mat B;
3261: PetscBool flg;
3262: #if defined(PETSC_HAVE_CUDA)
3263: /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3264: PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3265: #endif
3267: dof = PetscAbs(dof);
3268: PetscObjectReference((PetscObject)A);
3270: if (dof == 1) *maij = A;
3271: else {
3272: MatCreate(PetscObjectComm((PetscObject)A), &B);
3273: /* propagate vec type */
3274: MatSetVecType(B, A->defaultvectype);
3275: MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N);
3276: PetscLayoutSetBlockSize(B->rmap, dof);
3277: PetscLayoutSetBlockSize(B->cmap, dof);
3278: PetscLayoutSetUp(B->rmap);
3279: PetscLayoutSetUp(B->cmap);
3281: B->assembled = PETSC_TRUE;
3283: PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg);
3284: if (flg) {
3285: Mat_SeqMAIJ *b;
3287: MatSetType(B, MATSEQMAIJ);
3289: B->ops->setup = NULL;
3290: B->ops->destroy = MatDestroy_SeqMAIJ;
3291: B->ops->view = MatView_SeqMAIJ;
3293: b = (Mat_SeqMAIJ *)B->data;
3294: b->dof = dof;
3295: b->AIJ = A;
3297: if (dof == 2) {
3298: B->ops->mult = MatMult_SeqMAIJ_2;
3299: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
3300: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
3301: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3302: } else if (dof == 3) {
3303: B->ops->mult = MatMult_SeqMAIJ_3;
3304: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
3305: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
3306: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3307: } else if (dof == 4) {
3308: B->ops->mult = MatMult_SeqMAIJ_4;
3309: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
3310: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
3311: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3312: } else if (dof == 5) {
3313: B->ops->mult = MatMult_SeqMAIJ_5;
3314: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
3315: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
3316: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3317: } else if (dof == 6) {
3318: B->ops->mult = MatMult_SeqMAIJ_6;
3319: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
3320: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
3321: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3322: } else if (dof == 7) {
3323: B->ops->mult = MatMult_SeqMAIJ_7;
3324: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
3325: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
3326: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3327: } else if (dof == 8) {
3328: B->ops->mult = MatMult_SeqMAIJ_8;
3329: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
3330: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
3331: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3332: } else if (dof == 9) {
3333: B->ops->mult = MatMult_SeqMAIJ_9;
3334: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
3335: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
3336: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3337: } else if (dof == 10) {
3338: B->ops->mult = MatMult_SeqMAIJ_10;
3339: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
3340: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
3341: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3342: } else if (dof == 11) {
3343: B->ops->mult = MatMult_SeqMAIJ_11;
3344: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
3345: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
3346: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3347: } else if (dof == 16) {
3348: B->ops->mult = MatMult_SeqMAIJ_16;
3349: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
3350: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
3351: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3352: } else if (dof == 18) {
3353: B->ops->mult = MatMult_SeqMAIJ_18;
3354: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
3355: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
3356: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3357: } else {
3358: B->ops->mult = MatMult_SeqMAIJ_N;
3359: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
3360: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
3361: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3362: }
3363: #if defined(PETSC_HAVE_CUDA)
3364: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ);
3365: #endif
3366: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ);
3367: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ);
3368: } else {
3369: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
3370: Mat_MPIMAIJ *b;
3371: IS from, to;
3372: Vec gvec;
3374: MatSetType(B, MATMPIMAIJ);
3376: B->ops->setup = NULL;
3377: B->ops->destroy = MatDestroy_MPIMAIJ;
3378: B->ops->view = MatView_MPIMAIJ;
3380: b = (Mat_MPIMAIJ *)B->data;
3381: b->dof = dof;
3382: b->A = A;
3384: MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ);
3385: MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ);
3387: VecGetSize(mpiaij->lvec, &n);
3388: VecCreate(PETSC_COMM_SELF, &b->w);
3389: VecSetSizes(b->w, n * dof, n * dof);
3390: VecSetBlockSize(b->w, dof);
3391: VecSetType(b->w, VECSEQ);
3393: /* create two temporary Index sets for build scatter gather */
3394: ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from);
3395: ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to);
3397: /* create temporary global vector to generate scatter context */
3398: VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec);
3400: /* generate the scatter context */
3401: VecScatterCreate(gvec, from, b->w, to, &b->ctx);
3403: ISDestroy(&from);
3404: ISDestroy(&to);
3405: VecDestroy(&gvec);
3407: B->ops->mult = MatMult_MPIMAIJ_dof;
3408: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3409: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3410: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3412: #if defined(PETSC_HAVE_CUDA)
3413: PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ);
3414: #endif
3415: PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ);
3416: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ);
3417: }
3418: B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
3419: B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3420: MatSetUp(B);
3421: #if defined(PETSC_HAVE_CUDA)
3422: /* temporary until we have CUDA implementation of MAIJ */
3423: {
3424: PetscBool flg;
3425: if (convert) {
3426: PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "");
3427: if (flg) MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B);
3428: }
3429: }
3430: #endif
3431: *maij = B;
3432: }
3433: return 0;
3434: }