Actual source code: baijsolvnat2.c

  1: #include <../src/mat/impls/baij/seq/baij.h>
  2: #include <petsc/private/kernels/blockinvert.h>

  4: /*
  5:       Special case where the matrix was ILU(0) factored in the natural
  6:    ordering. This eliminates the need for the column and row permutation.
  7: */
  8: PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
  9: {
 10:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
 11:   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *diag = a->diag;
 12:   const MatScalar   *aa = a->a, *v;
 13:   PetscScalar       *x, s1, s2, x1, x2;
 14:   const PetscScalar *b;
 15:   PetscInt           jdx, idt, idx, nz, i;

 17:   VecGetArrayRead(bb, &b);
 18:   VecGetArray(xx, &x);

 20:   /* forward solve the lower triangular */
 21:   idx  = 0;
 22:   x[0] = b[0];
 23:   x[1] = b[1];
 24:   for (i = 1; i < n; i++) {
 25:     v  = aa + 4 * ai[i];
 26:     vi = aj + ai[i];
 27:     nz = diag[i] - ai[i];
 28:     idx += 2;
 29:     s1 = b[idx];
 30:     s2 = b[1 + idx];
 31:     while (nz--) {
 32:       jdx = 2 * (*vi++);
 33:       x1  = x[jdx];
 34:       x2  = x[1 + jdx];
 35:       s1 -= v[0] * x1 + v[2] * x2;
 36:       s2 -= v[1] * x1 + v[3] * x2;
 37:       v += 4;
 38:     }
 39:     x[idx]     = s1;
 40:     x[1 + idx] = s2;
 41:   }
 42:   /* backward solve the upper triangular */
 43:   for (i = n - 1; i >= 0; i--) {
 44:     v   = aa + 4 * diag[i] + 4;
 45:     vi  = aj + diag[i] + 1;
 46:     nz  = ai[i + 1] - diag[i] - 1;
 47:     idt = 2 * i;
 48:     s1  = x[idt];
 49:     s2  = x[1 + idt];
 50:     while (nz--) {
 51:       idx = 2 * (*vi++);
 52:       x1  = x[idx];
 53:       x2  = x[1 + idx];
 54:       s1 -= v[0] * x1 + v[2] * x2;
 55:       s2 -= v[1] * x1 + v[3] * x2;
 56:       v += 4;
 57:     }
 58:     v          = aa + 4 * diag[i];
 59:     x[idt]     = v[0] * s1 + v[2] * s2;
 60:     x[1 + idt] = v[1] * s1 + v[3] * s2;
 61:   }

 63:   VecRestoreArrayRead(bb, &b);
 64:   VecRestoreArray(xx, &x);
 65:   PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n);
 66:   return 0;
 67: }

 69: PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx)
 70: {
 71:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
 72:   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
 73:   PetscInt           i, k, nz, idx, idt, jdx;
 74:   const MatScalar   *aa = a->a, *v;
 75:   PetscScalar       *x, s1, s2, x1, x2;
 76:   const PetscScalar *b;

 78:   VecGetArrayRead(bb, &b);
 79:   VecGetArray(xx, &x);
 80:   /* forward solve the lower triangular */
 81:   idx  = 0;
 82:   x[0] = b[idx];
 83:   x[1] = b[1 + idx];
 84:   for (i = 1; i < n; i++) {
 85:     v   = aa + 4 * ai[i];
 86:     vi  = aj + ai[i];
 87:     nz  = ai[i + 1] - ai[i];
 88:     idx = 2 * i;
 89:     s1  = b[idx];
 90:     s2  = b[1 + idx];
 91:     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
 92:     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
 93:     for (k = 0; k < nz; k++) {
 94:       jdx = 2 * vi[k];
 95:       x1  = x[jdx];
 96:       x2  = x[1 + jdx];
 97:       s1 -= v[0] * x1 + v[2] * x2;
 98:       s2 -= v[1] * x1 + v[3] * x2;
 99:       v += 4;
100:     }
101:     x[idx]     = s1;
102:     x[1 + idx] = s2;
103:   }

105:   /* backward solve the upper triangular */
106:   for (i = n - 1; i >= 0; i--) {
107:     v   = aa + 4 * (adiag[i + 1] + 1);
108:     vi  = aj + adiag[i + 1] + 1;
109:     nz  = adiag[i] - adiag[i + 1] - 1;
110:     idt = 2 * i;
111:     s1  = x[idt];
112:     s2  = x[1 + idt];
113:     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
114:     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
115:     for (k = 0; k < nz; k++) {
116:       idx = 2 * vi[k];
117:       x1  = x[idx];
118:       x2  = x[1 + idx];
119:       s1 -= v[0] * x1 + v[2] * x2;
120:       s2 -= v[1] * x1 + v[3] * x2;
121:       v += 4;
122:     }
123:     /* x = inv_diagonal*x */
124:     x[idt]     = v[0] * s1 + v[2] * s2;
125:     x[1 + idt] = v[1] * s1 + v[3] * s2;
126:   }

128:   VecRestoreArrayRead(bb, &b);
129:   VecRestoreArray(xx, &x);
130:   PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n);
131:   return 0;
132: }

134: PetscErrorCode MatForwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx)
135: {
136:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
137:   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j;
138:   PetscInt           i, k, nz, idx, jdx;
139:   const MatScalar   *aa = a->a, *v;
140:   PetscScalar       *x, s1, s2, x1, x2;
141:   const PetscScalar *b;

143:   VecGetArrayRead(bb, &b);
144:   VecGetArray(xx, &x);
145:   /* forward solve the lower triangular */
146:   idx  = 0;
147:   x[0] = b[idx];
148:   x[1] = b[1 + idx];
149:   for (i = 1; i < n; i++) {
150:     v   = aa + 4 * ai[i];
151:     vi  = aj + ai[i];
152:     nz  = ai[i + 1] - ai[i];
153:     idx = 2 * i;
154:     s1  = b[idx];
155:     s2  = b[1 + idx];
156:     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
157:     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
158:     for (k = 0; k < nz; k++) {
159:       jdx = 2 * vi[k];
160:       x1  = x[jdx];
161:       x2  = x[1 + jdx];
162:       s1 -= v[0] * x1 + v[2] * x2;
163:       s2 -= v[1] * x1 + v[3] * x2;
164:       v += 4;
165:     }
166:     x[idx]     = s1;
167:     x[1 + idx] = s2;
168:   }

170:   VecRestoreArrayRead(bb, &b);
171:   VecRestoreArray(xx, &x);
172:   PetscLogFlops(4.0 * (a->nz) - A->cmap->n);
173:   return 0;
174: }

176: PetscErrorCode MatBackwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx)
177: {
178:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
179:   const PetscInt     n = a->mbs, *vi, *aj = a->j, *adiag = a->diag;
180:   PetscInt           i, k, nz, idx, idt;
181:   const MatScalar   *aa = a->a, *v;
182:   PetscScalar       *x, s1, s2, x1, x2;
183:   const PetscScalar *b;

185:   VecGetArrayRead(bb, &b);
186:   VecGetArray(xx, &x);

188:   /* backward solve the upper triangular */
189:   for (i = n - 1; i >= 0; i--) {
190:     v   = aa + 4 * (adiag[i + 1] + 1);
191:     vi  = aj + adiag[i + 1] + 1;
192:     nz  = adiag[i] - adiag[i + 1] - 1;
193:     idt = 2 * i;
194:     s1  = b[idt];
195:     s2  = b[1 + idt];
196:     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
197:     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
198:     for (k = 0; k < nz; k++) {
199:       idx = 2 * vi[k];
200:       x1  = x[idx];
201:       x2  = x[1 + idx];
202:       s1 -= v[0] * x1 + v[2] * x2;
203:       s2 -= v[1] * x1 + v[3] * x2;
204:       v += 4;
205:     }
206:     /* x = inv_diagonal*x */
207:     x[idt]     = v[0] * s1 + v[2] * s2;
208:     x[1 + idt] = v[1] * s1 + v[3] * s2;
209:   }

211:   VecRestoreArrayRead(bb, &b);
212:   VecRestoreArray(xx, &x);
213:   PetscLogFlops(4.0 * a->nz - A->cmap->n);
214:   return 0;
215: }