Actual source code: baijsolvnat2.c
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <petsc/private/kernels/blockinvert.h>
4: /*
5: Special case where the matrix was ILU(0) factored in the natural
6: ordering. This eliminates the need for the column and row permutation.
7: */
8: PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
9: {
10: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
11: const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *diag = a->diag;
12: const MatScalar *aa = a->a, *v;
13: PetscScalar *x, s1, s2, x1, x2;
14: const PetscScalar *b;
15: PetscInt jdx, idt, idx, nz, i;
17: VecGetArrayRead(bb, &b);
18: VecGetArray(xx, &x);
20: /* forward solve the lower triangular */
21: idx = 0;
22: x[0] = b[0];
23: x[1] = b[1];
24: for (i = 1; i < n; i++) {
25: v = aa + 4 * ai[i];
26: vi = aj + ai[i];
27: nz = diag[i] - ai[i];
28: idx += 2;
29: s1 = b[idx];
30: s2 = b[1 + idx];
31: while (nz--) {
32: jdx = 2 * (*vi++);
33: x1 = x[jdx];
34: x2 = x[1 + jdx];
35: s1 -= v[0] * x1 + v[2] * x2;
36: s2 -= v[1] * x1 + v[3] * x2;
37: v += 4;
38: }
39: x[idx] = s1;
40: x[1 + idx] = s2;
41: }
42: /* backward solve the upper triangular */
43: for (i = n - 1; i >= 0; i--) {
44: v = aa + 4 * diag[i] + 4;
45: vi = aj + diag[i] + 1;
46: nz = ai[i + 1] - diag[i] - 1;
47: idt = 2 * i;
48: s1 = x[idt];
49: s2 = x[1 + idt];
50: while (nz--) {
51: idx = 2 * (*vi++);
52: x1 = x[idx];
53: x2 = x[1 + idx];
54: s1 -= v[0] * x1 + v[2] * x2;
55: s2 -= v[1] * x1 + v[3] * x2;
56: v += 4;
57: }
58: v = aa + 4 * diag[i];
59: x[idt] = v[0] * s1 + v[2] * s2;
60: x[1 + idt] = v[1] * s1 + v[3] * s2;
61: }
63: VecRestoreArrayRead(bb, &b);
64: VecRestoreArray(xx, &x);
65: PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n);
66: return 0;
67: }
69: PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx)
70: {
71: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
72: const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
73: PetscInt i, k, nz, idx, idt, jdx;
74: const MatScalar *aa = a->a, *v;
75: PetscScalar *x, s1, s2, x1, x2;
76: const PetscScalar *b;
78: VecGetArrayRead(bb, &b);
79: VecGetArray(xx, &x);
80: /* forward solve the lower triangular */
81: idx = 0;
82: x[0] = b[idx];
83: x[1] = b[1 + idx];
84: for (i = 1; i < n; i++) {
85: v = aa + 4 * ai[i];
86: vi = aj + ai[i];
87: nz = ai[i + 1] - ai[i];
88: idx = 2 * i;
89: s1 = b[idx];
90: s2 = b[1 + idx];
91: PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
92: PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
93: for (k = 0; k < nz; k++) {
94: jdx = 2 * vi[k];
95: x1 = x[jdx];
96: x2 = x[1 + jdx];
97: s1 -= v[0] * x1 + v[2] * x2;
98: s2 -= v[1] * x1 + v[3] * x2;
99: v += 4;
100: }
101: x[idx] = s1;
102: x[1 + idx] = s2;
103: }
105: /* backward solve the upper triangular */
106: for (i = n - 1; i >= 0; i--) {
107: v = aa + 4 * (adiag[i + 1] + 1);
108: vi = aj + adiag[i + 1] + 1;
109: nz = adiag[i] - adiag[i + 1] - 1;
110: idt = 2 * i;
111: s1 = x[idt];
112: s2 = x[1 + idt];
113: PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
114: PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
115: for (k = 0; k < nz; k++) {
116: idx = 2 * vi[k];
117: x1 = x[idx];
118: x2 = x[1 + idx];
119: s1 -= v[0] * x1 + v[2] * x2;
120: s2 -= v[1] * x1 + v[3] * x2;
121: v += 4;
122: }
123: /* x = inv_diagonal*x */
124: x[idt] = v[0] * s1 + v[2] * s2;
125: x[1 + idt] = v[1] * s1 + v[3] * s2;
126: }
128: VecRestoreArrayRead(bb, &b);
129: VecRestoreArray(xx, &x);
130: PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n);
131: return 0;
132: }
134: PetscErrorCode MatForwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx)
135: {
136: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
137: const PetscInt n = a->mbs, *vi, *ai = a->i, *aj = a->j;
138: PetscInt i, k, nz, idx, jdx;
139: const MatScalar *aa = a->a, *v;
140: PetscScalar *x, s1, s2, x1, x2;
141: const PetscScalar *b;
143: VecGetArrayRead(bb, &b);
144: VecGetArray(xx, &x);
145: /* forward solve the lower triangular */
146: idx = 0;
147: x[0] = b[idx];
148: x[1] = b[1 + idx];
149: for (i = 1; i < n; i++) {
150: v = aa + 4 * ai[i];
151: vi = aj + ai[i];
152: nz = ai[i + 1] - ai[i];
153: idx = 2 * i;
154: s1 = b[idx];
155: s2 = b[1 + idx];
156: PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
157: PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
158: for (k = 0; k < nz; k++) {
159: jdx = 2 * vi[k];
160: x1 = x[jdx];
161: x2 = x[1 + jdx];
162: s1 -= v[0] * x1 + v[2] * x2;
163: s2 -= v[1] * x1 + v[3] * x2;
164: v += 4;
165: }
166: x[idx] = s1;
167: x[1 + idx] = s2;
168: }
170: VecRestoreArrayRead(bb, &b);
171: VecRestoreArray(xx, &x);
172: PetscLogFlops(4.0 * (a->nz) - A->cmap->n);
173: return 0;
174: }
176: PetscErrorCode MatBackwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx)
177: {
178: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
179: const PetscInt n = a->mbs, *vi, *aj = a->j, *adiag = a->diag;
180: PetscInt i, k, nz, idx, idt;
181: const MatScalar *aa = a->a, *v;
182: PetscScalar *x, s1, s2, x1, x2;
183: const PetscScalar *b;
185: VecGetArrayRead(bb, &b);
186: VecGetArray(xx, &x);
188: /* backward solve the upper triangular */
189: for (i = n - 1; i >= 0; i--) {
190: v = aa + 4 * (adiag[i + 1] + 1);
191: vi = aj + adiag[i + 1] + 1;
192: nz = adiag[i] - adiag[i + 1] - 1;
193: idt = 2 * i;
194: s1 = b[idt];
195: s2 = b[1 + idt];
196: PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
197: PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
198: for (k = 0; k < nz; k++) {
199: idx = 2 * vi[k];
200: x1 = x[idx];
201: x2 = x[1 + idx];
202: s1 -= v[0] * x1 + v[2] * x2;
203: s2 -= v[1] * x1 + v[3] * x2;
204: v += 4;
205: }
206: /* x = inv_diagonal*x */
207: x[idt] = v[0] * s1 + v[2] * s2;
208: x[1 + idt] = v[1] * s1 + v[3] * s2;
209: }
211: VecRestoreArrayRead(bb, &b);
212: VecRestoreArray(xx, &x);
213: PetscLogFlops(4.0 * a->nz - A->cmap->n);
214: return 0;
215: }