Actual source code: baijsolvnat11.c
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <petsc/private/kernels/blockinvert.h>
4: /* Block operations are done by accessing one column at at time */
5: /* Default MatSolve for block size 11 */
7: PetscErrorCode MatSolve_SeqBAIJ_11_NaturalOrdering(Mat A, Vec bb, Vec xx)
8: {
9: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
10: const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
11: PetscInt i, k, nz, idx, idt, m;
12: const MatScalar *aa = a->a, *v;
13: PetscScalar s[11];
14: PetscScalar *x, xv;
15: const PetscScalar *b;
17: VecGetArrayRead(bb, &b);
18: VecGetArray(xx, &x);
20: /* forward solve the lower triangular */
21: for (i = 0; i < n; i++) {
22: v = aa + bs2 * ai[i];
23: vi = aj + ai[i];
24: nz = ai[i + 1] - ai[i];
25: idt = bs * i;
26: x[idt] = b[idt];
27: x[1 + idt] = b[1 + idt];
28: x[2 + idt] = b[2 + idt];
29: x[3 + idt] = b[3 + idt];
30: x[4 + idt] = b[4 + idt];
31: x[5 + idt] = b[5 + idt];
32: x[6 + idt] = b[6 + idt];
33: x[7 + idt] = b[7 + idt];
34: x[8 + idt] = b[8 + idt];
35: x[9 + idt] = b[9 + idt];
36: x[10 + idt] = b[10 + idt];
37: for (m = 0; m < nz; m++) {
38: idx = bs * vi[m];
39: for (k = 0; k < 11; k++) {
40: xv = x[k + idx];
41: x[idt] -= v[0] * xv;
42: x[1 + idt] -= v[1] * xv;
43: x[2 + idt] -= v[2] * xv;
44: x[3 + idt] -= v[3] * xv;
45: x[4 + idt] -= v[4] * xv;
46: x[5 + idt] -= v[5] * xv;
47: x[6 + idt] -= v[6] * xv;
48: x[7 + idt] -= v[7] * xv;
49: x[8 + idt] -= v[8] * xv;
50: x[9 + idt] -= v[9] * xv;
51: x[10 + idt] -= v[10] * xv;
52: v += 11;
53: }
54: }
55: }
56: /* backward solve the upper triangular */
57: for (i = n - 1; i >= 0; i--) {
58: v = aa + bs2 * (adiag[i + 1] + 1);
59: vi = aj + adiag[i + 1] + 1;
60: nz = adiag[i] - adiag[i + 1] - 1;
61: idt = bs * i;
62: s[0] = x[idt];
63: s[1] = x[1 + idt];
64: s[2] = x[2 + idt];
65: s[3] = x[3 + idt];
66: s[4] = x[4 + idt];
67: s[5] = x[5 + idt];
68: s[6] = x[6 + idt];
69: s[7] = x[7 + idt];
70: s[8] = x[8 + idt];
71: s[9] = x[9 + idt];
72: s[10] = x[10 + idt];
74: for (m = 0; m < nz; m++) {
75: idx = bs * vi[m];
76: for (k = 0; k < 11; k++) {
77: xv = x[k + idx];
78: s[0] -= v[0] * xv;
79: s[1] -= v[1] * xv;
80: s[2] -= v[2] * xv;
81: s[3] -= v[3] * xv;
82: s[4] -= v[4] * xv;
83: s[5] -= v[5] * xv;
84: s[6] -= v[6] * xv;
85: s[7] -= v[7] * xv;
86: s[8] -= v[8] * xv;
87: s[9] -= v[9] * xv;
88: s[10] -= v[10] * xv;
89: v += 11;
90: }
91: }
92: PetscArrayzero(x + idt, bs);
93: for (k = 0; k < 11; k++) {
94: x[idt] += v[0] * s[k];
95: x[1 + idt] += v[1] * s[k];
96: x[2 + idt] += v[2] * s[k];
97: x[3 + idt] += v[3] * s[k];
98: x[4 + idt] += v[4] * s[k];
99: x[5 + idt] += v[5] * s[k];
100: x[6 + idt] += v[6] * s[k];
101: x[7 + idt] += v[7] * s[k];
102: x[8 + idt] += v[8] * s[k];
103: x[9 + idt] += v[9] * s[k];
104: x[10 + idt] += v[10] * s[k];
105: v += 11;
106: }
107: }
108: VecRestoreArrayRead(bb, &b);
109: VecRestoreArray(xx, &x);
110: PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n);
111: return 0;
112: }