Actual source code: baijsolvnat5.c
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <petsc/private/kernels/blockinvert.h>
4: PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
5: {
6: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
7: const PetscInt *diag = a->diag, n = a->mbs, *vi, *ai = a->i, *aj = a->j;
8: PetscInt i, nz, idx, idt, jdx;
9: const MatScalar *aa = a->a, *v;
10: PetscScalar *x, s1, s2, s3, s4, s5, x1, x2, x3, x4, x5;
11: const PetscScalar *b;
13: VecGetArrayRead(bb, &b);
14: VecGetArray(xx, &x);
15: /* forward solve the lower triangular */
16: idx = 0;
17: x[0] = b[idx];
18: x[1] = b[1 + idx];
19: x[2] = b[2 + idx];
20: x[3] = b[3 + idx];
21: x[4] = b[4 + idx];
22: for (i = 1; i < n; i++) {
23: v = aa + 25 * ai[i];
24: vi = aj + ai[i];
25: nz = diag[i] - ai[i];
26: idx = 5 * i;
27: s1 = b[idx];
28: s2 = b[1 + idx];
29: s3 = b[2 + idx];
30: s4 = b[3 + idx];
31: s5 = b[4 + idx];
32: while (nz--) {
33: jdx = 5 * (*vi++);
34: x1 = x[jdx];
35: x2 = x[1 + jdx];
36: x3 = x[2 + jdx];
37: x4 = x[3 + jdx];
38: x5 = x[4 + jdx];
39: s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5;
40: s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5;
41: s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5;
42: s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5;
43: s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5;
44: v += 25;
45: }
46: x[idx] = s1;
47: x[1 + idx] = s2;
48: x[2 + idx] = s3;
49: x[3 + idx] = s4;
50: x[4 + idx] = s5;
51: }
52: /* backward solve the upper triangular */
53: for (i = n - 1; i >= 0; i--) {
54: v = aa + 25 * diag[i] + 25;
55: vi = aj + diag[i] + 1;
56: nz = ai[i + 1] - diag[i] - 1;
57: idt = 5 * i;
58: s1 = x[idt];
59: s2 = x[1 + idt];
60: s3 = x[2 + idt];
61: s4 = x[3 + idt];
62: s5 = x[4 + idt];
63: while (nz--) {
64: idx = 5 * (*vi++);
65: x1 = x[idx];
66: x2 = x[1 + idx];
67: x3 = x[2 + idx];
68: x4 = x[3 + idx];
69: x5 = x[4 + idx];
70: s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5;
71: s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5;
72: s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5;
73: s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5;
74: s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5;
75: v += 25;
76: }
77: v = aa + 25 * diag[i];
78: x[idt] = v[0] * s1 + v[5] * s2 + v[10] * s3 + v[15] * s4 + v[20] * s5;
79: x[1 + idt] = v[1] * s1 + v[6] * s2 + v[11] * s3 + v[16] * s4 + v[21] * s5;
80: x[2 + idt] = v[2] * s1 + v[7] * s2 + v[12] * s3 + v[17] * s4 + v[22] * s5;
81: x[3 + idt] = v[3] * s1 + v[8] * s2 + v[13] * s3 + v[18] * s4 + v[23] * s5;
82: x[4 + idt] = v[4] * s1 + v[9] * s2 + v[14] * s3 + v[19] * s4 + v[24] * s5;
83: }
85: VecRestoreArrayRead(bb, &b);
86: VecRestoreArray(xx, &x);
87: PetscLogFlops(2.0 * 25 * (a->nz) - 5.0 * A->cmap->n);
88: return 0;
89: }
91: PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A, Vec bb, Vec xx)
92: {
93: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
94: const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
95: PetscInt i, k, nz, idx, idt, jdx;
96: const MatScalar *aa = a->a, *v;
97: PetscScalar *x, s1, s2, s3, s4, s5, x1, x2, x3, x4, x5;
98: const PetscScalar *b;
100: VecGetArrayRead(bb, &b);
101: VecGetArray(xx, &x);
102: /* forward solve the lower triangular */
103: idx = 0;
104: x[0] = b[idx];
105: x[1] = b[1 + idx];
106: x[2] = b[2 + idx];
107: x[3] = b[3 + idx];
108: x[4] = b[4 + idx];
109: for (i = 1; i < n; i++) {
110: v = aa + 25 * ai[i];
111: vi = aj + ai[i];
112: nz = ai[i + 1] - ai[i];
113: idx = 5 * i;
114: s1 = b[idx];
115: s2 = b[1 + idx];
116: s3 = b[2 + idx];
117: s4 = b[3 + idx];
118: s5 = b[4 + idx];
119: for (k = 0; k < nz; k++) {
120: jdx = 5 * vi[k];
121: x1 = x[jdx];
122: x2 = x[1 + jdx];
123: x3 = x[2 + jdx];
124: x4 = x[3 + jdx];
125: x5 = x[4 + jdx];
126: s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5;
127: s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5;
128: s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5;
129: s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5;
130: s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5;
131: v += 25;
132: }
133: x[idx] = s1;
134: x[1 + idx] = s2;
135: x[2 + idx] = s3;
136: x[3 + idx] = s4;
137: x[4 + idx] = s5;
138: }
140: /* backward solve the upper triangular */
141: for (i = n - 1; i >= 0; i--) {
142: v = aa + 25 * (adiag[i + 1] + 1);
143: vi = aj + adiag[i + 1] + 1;
144: nz = adiag[i] - adiag[i + 1] - 1;
145: idt = 5 * i;
146: s1 = x[idt];
147: s2 = x[1 + idt];
148: s3 = x[2 + idt];
149: s4 = x[3 + idt];
150: s5 = x[4 + idt];
151: for (k = 0; k < nz; k++) {
152: idx = 5 * vi[k];
153: x1 = x[idx];
154: x2 = x[1 + idx];
155: x3 = x[2 + idx];
156: x4 = x[3 + idx];
157: x5 = x[4 + idx];
158: s1 -= v[0] * x1 + v[5] * x2 + v[10] * x3 + v[15] * x4 + v[20] * x5;
159: s2 -= v[1] * x1 + v[6] * x2 + v[11] * x3 + v[16] * x4 + v[21] * x5;
160: s3 -= v[2] * x1 + v[7] * x2 + v[12] * x3 + v[17] * x4 + v[22] * x5;
161: s4 -= v[3] * x1 + v[8] * x2 + v[13] * x3 + v[18] * x4 + v[23] * x5;
162: s5 -= v[4] * x1 + v[9] * x2 + v[14] * x3 + v[19] * x4 + v[24] * x5;
163: v += 25;
164: }
165: /* x = inv_diagonal*x */
166: x[idt] = v[0] * s1 + v[5] * s2 + v[10] * s3 + v[15] * s4 + v[20] * s5;
167: x[1 + idt] = v[1] * s1 + v[6] * s2 + v[11] * s3 + v[16] * s4 + v[21] * s5;
168: x[2 + idt] = v[2] * s1 + v[7] * s2 + v[12] * s3 + v[17] * s4 + v[22] * s5;
169: x[3 + idt] = v[3] * s1 + v[8] * s2 + v[13] * s3 + v[18] * s4 + v[23] * s5;
170: x[4 + idt] = v[4] * s1 + v[9] * s2 + v[14] * s3 + v[19] * s4 + v[24] * s5;
171: }
173: VecRestoreArrayRead(bb, &b);
174: VecRestoreArray(xx, &x);
175: PetscLogFlops(2.0 * 25 * (a->nz) - 5.0 * A->cmap->n);
176: return 0;
177: }