Actual source code: baijsolvnat3.c
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <petsc/private/kernels/blockinvert.h>
4: /*
5: Special case where the matrix was ILU(0) factored in the natural
6: ordering. This eliminates the need for the column and row permutation.
7: */
8: PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
9: {
10: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
11: const PetscInt n = a->mbs, *ai = a->i, *aj = a->j;
12: const PetscInt *diag = a->diag, *vi;
13: const MatScalar *aa = a->a, *v;
14: PetscScalar *x, s1, s2, s3, x1, x2, x3;
15: const PetscScalar *b;
16: PetscInt jdx, idt, idx, nz, i;
18: VecGetArrayRead(bb, &b);
19: VecGetArray(xx, &x);
21: /* forward solve the lower triangular */
22: idx = 0;
23: x[0] = b[0];
24: x[1] = b[1];
25: x[2] = b[2];
26: for (i = 1; i < n; i++) {
27: v = aa + 9 * ai[i];
28: vi = aj + ai[i];
29: nz = diag[i] - ai[i];
30: idx += 3;
31: s1 = b[idx];
32: s2 = b[1 + idx];
33: s3 = b[2 + idx];
34: while (nz--) {
35: jdx = 3 * (*vi++);
36: x1 = x[jdx];
37: x2 = x[1 + jdx];
38: x3 = x[2 + jdx];
39: s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
40: s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
41: s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
42: v += 9;
43: }
44: x[idx] = s1;
45: x[1 + idx] = s2;
46: x[2 + idx] = s3;
47: }
48: /* backward solve the upper triangular */
49: for (i = n - 1; i >= 0; i--) {
50: v = aa + 9 * diag[i] + 9;
51: vi = aj + diag[i] + 1;
52: nz = ai[i + 1] - diag[i] - 1;
53: idt = 3 * i;
54: s1 = x[idt];
55: s2 = x[1 + idt];
56: s3 = x[2 + idt];
57: while (nz--) {
58: idx = 3 * (*vi++);
59: x1 = x[idx];
60: x2 = x[1 + idx];
61: x3 = x[2 + idx];
62: s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
63: s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
64: s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
65: v += 9;
66: }
67: v = aa + 9 * diag[i];
68: x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3;
69: x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
70: x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
71: }
73: VecRestoreArrayRead(bb, &b);
74: VecRestoreArray(xx, &x);
75: PetscLogFlops(2.0 * 9 * (a->nz) - 3.0 * A->cmap->n);
76: return 0;
77: }
79: PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
80: {
81: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
82: const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
83: PetscInt i, k, nz, idx, jdx, idt;
84: const PetscInt bs = A->rmap->bs, bs2 = a->bs2;
85: const MatScalar *aa = a->a, *v;
86: PetscScalar *x;
87: const PetscScalar *b;
88: PetscScalar s1, s2, s3, x1, x2, x3;
90: VecGetArrayRead(bb, &b);
91: VecGetArray(xx, &x);
92: /* forward solve the lower triangular */
93: idx = 0;
94: x[0] = b[idx];
95: x[1] = b[1 + idx];
96: x[2] = b[2 + idx];
97: for (i = 1; i < n; i++) {
98: v = aa + bs2 * ai[i];
99: vi = aj + ai[i];
100: nz = ai[i + 1] - ai[i];
101: idx = bs * i;
102: s1 = b[idx];
103: s2 = b[1 + idx];
104: s3 = b[2 + idx];
105: for (k = 0; k < nz; k++) {
106: jdx = bs * vi[k];
107: x1 = x[jdx];
108: x2 = x[1 + jdx];
109: x3 = x[2 + jdx];
110: s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
111: s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
112: s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
114: v += bs2;
115: }
117: x[idx] = s1;
118: x[1 + idx] = s2;
119: x[2 + idx] = s3;
120: }
122: /* backward solve the upper triangular */
123: for (i = n - 1; i >= 0; i--) {
124: v = aa + bs2 * (adiag[i + 1] + 1);
125: vi = aj + adiag[i + 1] + 1;
126: nz = adiag[i] - adiag[i + 1] - 1;
127: idt = bs * i;
128: s1 = x[idt];
129: s2 = x[1 + idt];
130: s3 = x[2 + idt];
132: for (k = 0; k < nz; k++) {
133: idx = bs * vi[k];
134: x1 = x[idx];
135: x2 = x[1 + idx];
136: x3 = x[2 + idx];
137: s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
138: s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
139: s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
141: v += bs2;
142: }
143: /* x = inv_diagonal*x */
144: x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3;
145: x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
146: x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
147: }
149: VecRestoreArrayRead(bb, &b);
150: VecRestoreArray(xx, &x);
151: PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n);
152: return 0;
153: }
155: PetscErrorCode MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
156: {
157: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
158: const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j;
159: PetscInt i, k, nz, idx, jdx;
160: const PetscInt bs = A->rmap->bs, bs2 = a->bs2;
161: const MatScalar *aa = a->a, *v;
162: PetscScalar *x;
163: const PetscScalar *b;
164: PetscScalar s1, s2, s3, x1, x2, x3;
166: VecGetArrayRead(bb, &b);
167: VecGetArray(xx, &x);
168: /* forward solve the lower triangular */
169: idx = 0;
170: x[0] = b[idx];
171: x[1] = b[1 + idx];
172: x[2] = b[2 + idx];
173: for (i = 1; i < n; i++) {
174: v = aa + bs2 * ai[i];
175: vi = aj + ai[i];
176: nz = ai[i + 1] - ai[i];
177: idx = bs * i;
178: s1 = b[idx];
179: s2 = b[1 + idx];
180: s3 = b[2 + idx];
181: for (k = 0; k < nz; k++) {
182: jdx = bs * vi[k];
183: x1 = x[jdx];
184: x2 = x[1 + jdx];
185: x3 = x[2 + jdx];
186: s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
187: s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
188: s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
190: v += bs2;
191: }
193: x[idx] = s1;
194: x[1 + idx] = s2;
195: x[2 + idx] = s3;
196: }
198: VecRestoreArrayRead(bb, &b);
199: VecRestoreArray(xx, &x);
200: PetscLogFlops(1.0 * bs2 * (a->nz) - bs * A->cmap->n);
201: return 0;
202: }
204: PetscErrorCode MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A, Vec bb, Vec xx)
205: {
206: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
207: const PetscInt n = a->mbs, *vi, *aj = a->j, *adiag = a->diag;
208: PetscInt i, k, nz, idx, idt;
209: const PetscInt bs = A->rmap->bs, bs2 = a->bs2;
210: const MatScalar *aa = a->a, *v;
211: PetscScalar *x;
212: const PetscScalar *b;
213: PetscScalar s1, s2, s3, x1, x2, x3;
215: VecGetArrayRead(bb, &b);
216: VecGetArray(xx, &x);
218: /* backward solve the upper triangular */
219: for (i = n - 1; i >= 0; i--) {
220: v = aa + bs2 * (adiag[i + 1] + 1);
221: vi = aj + adiag[i + 1] + 1;
222: nz = adiag[i] - adiag[i + 1] - 1;
223: idt = bs * i;
224: s1 = b[idt];
225: s2 = b[1 + idt];
226: s3 = b[2 + idt];
228: for (k = 0; k < nz; k++) {
229: idx = bs * vi[k];
230: x1 = x[idx];
231: x2 = x[1 + idx];
232: x3 = x[2 + idx];
233: s1 -= v[0] * x1 + v[3] * x2 + v[6] * x3;
234: s2 -= v[1] * x1 + v[4] * x2 + v[7] * x3;
235: s3 -= v[2] * x1 + v[5] * x2 + v[8] * x3;
237: v += bs2;
238: }
239: /* x = inv_diagonal*x */
240: x[idt] = v[0] * s1 + v[3] * s2 + v[6] * s3;
241: x[1 + idt] = v[1] * s1 + v[4] * s2 + v[7] * s3;
242: x[2 + idt] = v[2] * s1 + v[5] * s2 + v[8] * s3;
243: }
245: VecRestoreArrayRead(bb, &b);
246: VecRestoreArray(xx, &x);
247: PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n);
248: return 0;
249: }