Actual source code: baijsolvnat14.c

  1: #include <../src/mat/impls/baij/seq/baij.h>
  2: #include <petsc/private/kernels/blockinvert.h>

  4: /* Block operations are done by accessing one column at at time */
  5: /* Default MatSolve for block size 14 */

  7: PetscErrorCode MatSolve_SeqBAIJ_14_NaturalOrdering(Mat A, Vec bb, Vec xx)
  8: {
  9:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
 10:   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
 11:   PetscInt           i, k, nz, idx, idt, m;
 12:   const MatScalar   *aa = a->a, *v;
 13:   PetscScalar        s[14];
 14:   PetscScalar       *x, xv;
 15:   const PetscScalar *b;

 17:   VecGetArrayRead(bb, &b);
 18:   VecGetArray(xx, &x);

 20:   /* forward solve the lower triangular */
 21:   for (i = 0; i < n; i++) {
 22:     v           = aa + bs2 * ai[i];
 23:     vi          = aj + ai[i];
 24:     nz          = ai[i + 1] - ai[i];
 25:     idt         = bs * i;
 26:     x[idt]      = b[idt];
 27:     x[1 + idt]  = b[1 + idt];
 28:     x[2 + idt]  = b[2 + idt];
 29:     x[3 + idt]  = b[3 + idt];
 30:     x[4 + idt]  = b[4 + idt];
 31:     x[5 + idt]  = b[5 + idt];
 32:     x[6 + idt]  = b[6 + idt];
 33:     x[7 + idt]  = b[7 + idt];
 34:     x[8 + idt]  = b[8 + idt];
 35:     x[9 + idt]  = b[9 + idt];
 36:     x[10 + idt] = b[10 + idt];
 37:     x[11 + idt] = b[11 + idt];
 38:     x[12 + idt] = b[12 + idt];
 39:     x[13 + idt] = b[13 + idt];
 40:     for (m = 0; m < nz; m++) {
 41:       idx = bs * vi[m];
 42:       for (k = 0; k < bs; k++) {
 43:         xv = x[k + idx];
 44:         x[idt] -= v[0] * xv;
 45:         x[1 + idt] -= v[1] * xv;
 46:         x[2 + idt] -= v[2] * xv;
 47:         x[3 + idt] -= v[3] * xv;
 48:         x[4 + idt] -= v[4] * xv;
 49:         x[5 + idt] -= v[5] * xv;
 50:         x[6 + idt] -= v[6] * xv;
 51:         x[7 + idt] -= v[7] * xv;
 52:         x[8 + idt] -= v[8] * xv;
 53:         x[9 + idt] -= v[9] * xv;
 54:         x[10 + idt] -= v[10] * xv;
 55:         x[11 + idt] -= v[11] * xv;
 56:         x[12 + idt] -= v[12] * xv;
 57:         x[13 + idt] -= v[13] * xv;
 58:         v += 14;
 59:       }
 60:     }
 61:   }
 62:   /* backward solve the upper triangular */
 63:   for (i = n - 1; i >= 0; i--) {
 64:     v     = aa + bs2 * (adiag[i + 1] + 1);
 65:     vi    = aj + adiag[i + 1] + 1;
 66:     nz    = adiag[i] - adiag[i + 1] - 1;
 67:     idt   = bs * i;
 68:     s[0]  = x[idt];
 69:     s[1]  = x[1 + idt];
 70:     s[2]  = x[2 + idt];
 71:     s[3]  = x[3 + idt];
 72:     s[4]  = x[4 + idt];
 73:     s[5]  = x[5 + idt];
 74:     s[6]  = x[6 + idt];
 75:     s[7]  = x[7 + idt];
 76:     s[8]  = x[8 + idt];
 77:     s[9]  = x[9 + idt];
 78:     s[10] = x[10 + idt];
 79:     s[11] = x[11 + idt];
 80:     s[12] = x[12 + idt];
 81:     s[13] = x[13 + idt];

 83:     for (m = 0; m < nz; m++) {
 84:       idx = bs * vi[m];
 85:       for (k = 0; k < bs; k++) {
 86:         xv = x[k + idx];
 87:         s[0] -= v[0] * xv;
 88:         s[1] -= v[1] * xv;
 89:         s[2] -= v[2] * xv;
 90:         s[3] -= v[3] * xv;
 91:         s[4] -= v[4] * xv;
 92:         s[5] -= v[5] * xv;
 93:         s[6] -= v[6] * xv;
 94:         s[7] -= v[7] * xv;
 95:         s[8] -= v[8] * xv;
 96:         s[9] -= v[9] * xv;
 97:         s[10] -= v[10] * xv;
 98:         s[11] -= v[11] * xv;
 99:         s[12] -= v[12] * xv;
100:         s[13] -= v[13] * xv;
101:         v += 14;
102:       }
103:     }
104:     PetscArrayzero(x + idt, bs);
105:     for (k = 0; k < bs; k++) {
106:       x[idt] += v[0] * s[k];
107:       x[1 + idt] += v[1] * s[k];
108:       x[2 + idt] += v[2] * s[k];
109:       x[3 + idt] += v[3] * s[k];
110:       x[4 + idt] += v[4] * s[k];
111:       x[5 + idt] += v[5] * s[k];
112:       x[6 + idt] += v[6] * s[k];
113:       x[7 + idt] += v[7] * s[k];
114:       x[8 + idt] += v[8] * s[k];
115:       x[9 + idt] += v[9] * s[k];
116:       x[10 + idt] += v[10] * s[k];
117:       x[11 + idt] += v[11] * s[k];
118:       x[12 + idt] += v[12] * s[k];
119:       x[13 + idt] += v[13] * s[k];
120:       v += 14;
121:     }
122:   }
123:   VecRestoreArrayRead(bb, &b);
124:   VecRestoreArray(xx, &x);
125:   PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n);
126:   return 0;
127: }

129: /* Block operations are done by accessing one column at at time */
130: /* Default MatSolve for block size 13 */

132: PetscErrorCode MatSolve_SeqBAIJ_13_NaturalOrdering(Mat A, Vec bb, Vec xx)
133: {
134:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
135:   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
136:   PetscInt           i, k, nz, idx, idt, m;
137:   const MatScalar   *aa = a->a, *v;
138:   PetscScalar        s[13];
139:   PetscScalar       *x, xv;
140:   const PetscScalar *b;

142:   VecGetArrayRead(bb, &b);
143:   VecGetArray(xx, &x);

145:   /* forward solve the lower triangular */
146:   for (i = 0; i < n; i++) {
147:     v           = aa + bs2 * ai[i];
148:     vi          = aj + ai[i];
149:     nz          = ai[i + 1] - ai[i];
150:     idt         = bs * i;
151:     x[idt]      = b[idt];
152:     x[1 + idt]  = b[1 + idt];
153:     x[2 + idt]  = b[2 + idt];
154:     x[3 + idt]  = b[3 + idt];
155:     x[4 + idt]  = b[4 + idt];
156:     x[5 + idt]  = b[5 + idt];
157:     x[6 + idt]  = b[6 + idt];
158:     x[7 + idt]  = b[7 + idt];
159:     x[8 + idt]  = b[8 + idt];
160:     x[9 + idt]  = b[9 + idt];
161:     x[10 + idt] = b[10 + idt];
162:     x[11 + idt] = b[11 + idt];
163:     x[12 + idt] = b[12 + idt];
164:     for (m = 0; m < nz; m++) {
165:       idx = bs * vi[m];
166:       for (k = 0; k < bs; k++) {
167:         xv = x[k + idx];
168:         x[idt] -= v[0] * xv;
169:         x[1 + idt] -= v[1] * xv;
170:         x[2 + idt] -= v[2] * xv;
171:         x[3 + idt] -= v[3] * xv;
172:         x[4 + idt] -= v[4] * xv;
173:         x[5 + idt] -= v[5] * xv;
174:         x[6 + idt] -= v[6] * xv;
175:         x[7 + idt] -= v[7] * xv;
176:         x[8 + idt] -= v[8] * xv;
177:         x[9 + idt] -= v[9] * xv;
178:         x[10 + idt] -= v[10] * xv;
179:         x[11 + idt] -= v[11] * xv;
180:         x[12 + idt] -= v[12] * xv;
181:         v += 13;
182:       }
183:     }
184:   }
185:   /* backward solve the upper triangular */
186:   for (i = n - 1; i >= 0; i--) {
187:     v     = aa + bs2 * (adiag[i + 1] + 1);
188:     vi    = aj + adiag[i + 1] + 1;
189:     nz    = adiag[i] - adiag[i + 1] - 1;
190:     idt   = bs * i;
191:     s[0]  = x[idt];
192:     s[1]  = x[1 + idt];
193:     s[2]  = x[2 + idt];
194:     s[3]  = x[3 + idt];
195:     s[4]  = x[4 + idt];
196:     s[5]  = x[5 + idt];
197:     s[6]  = x[6 + idt];
198:     s[7]  = x[7 + idt];
199:     s[8]  = x[8 + idt];
200:     s[9]  = x[9 + idt];
201:     s[10] = x[10 + idt];
202:     s[11] = x[11 + idt];
203:     s[12] = x[12 + idt];

205:     for (m = 0; m < nz; m++) {
206:       idx = bs * vi[m];
207:       for (k = 0; k < bs; k++) {
208:         xv = x[k + idx];
209:         s[0] -= v[0] * xv;
210:         s[1] -= v[1] * xv;
211:         s[2] -= v[2] * xv;
212:         s[3] -= v[3] * xv;
213:         s[4] -= v[4] * xv;
214:         s[5] -= v[5] * xv;
215:         s[6] -= v[6] * xv;
216:         s[7] -= v[7] * xv;
217:         s[8] -= v[8] * xv;
218:         s[9] -= v[9] * xv;
219:         s[10] -= v[10] * xv;
220:         s[11] -= v[11] * xv;
221:         s[12] -= v[12] * xv;
222:         v += 13;
223:       }
224:     }
225:     PetscArrayzero(x + idt, bs);
226:     for (k = 0; k < bs; k++) {
227:       x[idt] += v[0] * s[k];
228:       x[1 + idt] += v[1] * s[k];
229:       x[2 + idt] += v[2] * s[k];
230:       x[3 + idt] += v[3] * s[k];
231:       x[4 + idt] += v[4] * s[k];
232:       x[5 + idt] += v[5] * s[k];
233:       x[6 + idt] += v[6] * s[k];
234:       x[7 + idt] += v[7] * s[k];
235:       x[8 + idt] += v[8] * s[k];
236:       x[9 + idt] += v[9] * s[k];
237:       x[10 + idt] += v[10] * s[k];
238:       x[11 + idt] += v[11] * s[k];
239:       x[12 + idt] += v[12] * s[k];
240:       v += 13;
241:     }
242:   }
243:   VecRestoreArrayRead(bb, &b);
244:   VecRestoreArray(xx, &x);
245:   PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n);
246:   return 0;
247: }

249: /* Block operations are done by accessing one column at at time */
250: /* Default MatSolve for block size 12 */

252: PetscErrorCode MatSolve_SeqBAIJ_12_NaturalOrdering(Mat A, Vec bb, Vec xx)
253: {
254:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
255:   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
256:   PetscInt           i, k, nz, idx, idt, m;
257:   const MatScalar   *aa = a->a, *v;
258:   PetscScalar        s[12];
259:   PetscScalar       *x, xv;
260:   const PetscScalar *b;

262:   VecGetArrayRead(bb, &b);
263:   VecGetArray(xx, &x);

265:   /* forward solve the lower triangular */
266:   for (i = 0; i < n; i++) {
267:     v           = aa + bs2 * ai[i];
268:     vi          = aj + ai[i];
269:     nz          = ai[i + 1] - ai[i];
270:     idt         = bs * i;
271:     x[idt]      = b[idt];
272:     x[1 + idt]  = b[1 + idt];
273:     x[2 + idt]  = b[2 + idt];
274:     x[3 + idt]  = b[3 + idt];
275:     x[4 + idt]  = b[4 + idt];
276:     x[5 + idt]  = b[5 + idt];
277:     x[6 + idt]  = b[6 + idt];
278:     x[7 + idt]  = b[7 + idt];
279:     x[8 + idt]  = b[8 + idt];
280:     x[9 + idt]  = b[9 + idt];
281:     x[10 + idt] = b[10 + idt];
282:     x[11 + idt] = b[11 + idt];
283:     for (m = 0; m < nz; m++) {
284:       idx = bs * vi[m];
285:       for (k = 0; k < bs; k++) {
286:         xv = x[k + idx];
287:         x[idt] -= v[0] * xv;
288:         x[1 + idt] -= v[1] * xv;
289:         x[2 + idt] -= v[2] * xv;
290:         x[3 + idt] -= v[3] * xv;
291:         x[4 + idt] -= v[4] * xv;
292:         x[5 + idt] -= v[5] * xv;
293:         x[6 + idt] -= v[6] * xv;
294:         x[7 + idt] -= v[7] * xv;
295:         x[8 + idt] -= v[8] * xv;
296:         x[9 + idt] -= v[9] * xv;
297:         x[10 + idt] -= v[10] * xv;
298:         x[11 + idt] -= v[11] * xv;
299:         v += 12;
300:       }
301:     }
302:   }
303:   /* backward solve the upper triangular */
304:   for (i = n - 1; i >= 0; i--) {
305:     v     = aa + bs2 * (adiag[i + 1] + 1);
306:     vi    = aj + adiag[i + 1] + 1;
307:     nz    = adiag[i] - adiag[i + 1] - 1;
308:     idt   = bs * i;
309:     s[0]  = x[idt];
310:     s[1]  = x[1 + idt];
311:     s[2]  = x[2 + idt];
312:     s[3]  = x[3 + idt];
313:     s[4]  = x[4 + idt];
314:     s[5]  = x[5 + idt];
315:     s[6]  = x[6 + idt];
316:     s[7]  = x[7 + idt];
317:     s[8]  = x[8 + idt];
318:     s[9]  = x[9 + idt];
319:     s[10] = x[10 + idt];
320:     s[11] = x[11 + idt];

322:     for (m = 0; m < nz; m++) {
323:       idx = bs * vi[m];
324:       for (k = 0; k < bs; k++) {
325:         xv = x[k + idx];
326:         s[0] -= v[0] * xv;
327:         s[1] -= v[1] * xv;
328:         s[2] -= v[2] * xv;
329:         s[3] -= v[3] * xv;
330:         s[4] -= v[4] * xv;
331:         s[5] -= v[5] * xv;
332:         s[6] -= v[6] * xv;
333:         s[7] -= v[7] * xv;
334:         s[8] -= v[8] * xv;
335:         s[9] -= v[9] * xv;
336:         s[10] -= v[10] * xv;
337:         s[11] -= v[11] * xv;
338:         v += 12;
339:       }
340:     }
341:     PetscArrayzero(x + idt, bs);
342:     for (k = 0; k < bs; k++) {
343:       x[idt] += v[0] * s[k];
344:       x[1 + idt] += v[1] * s[k];
345:       x[2 + idt] += v[2] * s[k];
346:       x[3 + idt] += v[3] * s[k];
347:       x[4 + idt] += v[4] * s[k];
348:       x[5 + idt] += v[5] * s[k];
349:       x[6 + idt] += v[6] * s[k];
350:       x[7 + idt] += v[7] * s[k];
351:       x[8 + idt] += v[8] * s[k];
352:       x[9 + idt] += v[9] * s[k];
353:       x[10 + idt] += v[10] * s[k];
354:       x[11 + idt] += v[11] * s[k];
355:       v += 12;
356:     }
357:   }
358:   VecRestoreArrayRead(bb, &b);
359:   VecRestoreArray(xx, &x);
360:   PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n);
361:   return 0;
362: }