Actual source code: baijfact3.c


  2: /*
  3:     Factorization code for BAIJ format.
  4: */
  5: #include <../src/mat/impls/baij/seq/baij.h>
  6: #include <petsc/private/kernels/blockinvert.h>

  8: /*
  9:    This is used to set the numeric factorization for both LU and ILU symbolic factorization
 10: */
 11: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural)
 12: {
 13:   if (natural) {
 14:     switch (fact->rmap->bs) {
 15:     case 1:
 16:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 17:       break;
 18:     case 2:
 19:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
 20:       break;
 21:     case 3:
 22:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
 23:       break;
 24:     case 4:
 25:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
 26:       break;
 27:     case 5:
 28:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
 29:       break;
 30:     case 6:
 31:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
 32:       break;
 33:     case 7:
 34:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
 35:       break;
 36:     case 9:
 37: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
 38:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
 39: #else
 40:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 41: #endif
 42:       break;
 43:     case 15:
 44:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
 45:       break;
 46:     default:
 47:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 48:       break;
 49:     }
 50:   } else {
 51:     switch (fact->rmap->bs) {
 52:     case 1:
 53:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 54:       break;
 55:     case 2:
 56:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
 57:       break;
 58:     case 3:
 59:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
 60:       break;
 61:     case 4:
 62:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
 63:       break;
 64:     case 5:
 65:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
 66:       break;
 67:     case 6:
 68:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
 69:       break;
 70:     case 7:
 71:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
 72:       break;
 73:     default:
 74:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 75:       break;
 76:     }
 77:   }
 78:   return 0;
 79: }

 81: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural)
 82: {
 83:   if (natural) {
 84:     switch (inA->rmap->bs) {
 85:     case 1:
 86:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
 87:       break;
 88:     case 2:
 89:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
 90:       break;
 91:     case 3:
 92:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
 93:       break;
 94:     case 4:
 95: #if defined(PETSC_USE_REAL_MAT_SINGLE)
 96:     {
 97:       PetscBool sse_enabled_local;
 98:       PetscSSEIsEnabled(inA->comm, &sse_enabled_local, NULL);
 99:       if (sse_enabled_local) {
100:   #if defined(PETSC_HAVE_SSE)
101:         int i, *AJ = a->j, nz = a->nz, n = a->mbs;
102:         if (n == (unsigned short)n) {
103:           unsigned short *aj = (unsigned short *)AJ;
104:           for (i = 0; i < nz; i++) aj[i] = (unsigned short)AJ[i];

106:           inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
107:           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;

109:           PetscInfo(inA, "Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
110:         } else {
111:           /* Scale the column indices for easier indexing in MatSolve. */
112:           /*            for (i=0;i<nz;i++) { */
113:           /*              AJ[i] = AJ[i]*4; */
114:           /*            } */
115:           inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
116:           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;

118:           PetscInfo(inA, "Using special SSE, in-place natural ordering, int j index factor BS=4\n");
119:         }
120:   #else
121:         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
122:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "SSE Hardware unavailable");
123:   #endif
124:       } else {
125:         inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
126:       }
127:     }
128: #else
129:       inA->ops->lufactornumeric  = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
130: #endif
131:     break;
132:     case 5:
133:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
134:       break;
135:     case 6:
136:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
137:       break;
138:     case 7:
139:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
140:       break;
141:     default:
142:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
143:       break;
144:     }
145:   } else {
146:     switch (inA->rmap->bs) {
147:     case 1:
148:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
149:       break;
150:     case 2:
151:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
152:       break;
153:     case 3:
154:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
155:       break;
156:     case 4:
157:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
158:       break;
159:     case 5:
160:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
161:       break;
162:     case 6:
163:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
164:       break;
165:     case 7:
166:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
167:       break;
168:     default:
169:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
170:       break;
171:     }
172:   }
173:   return 0;
174: }

176: /*
177:     The symbolic factorization code is identical to that for AIJ format,
178:   except for very small changes since this is now a SeqBAIJ datastructure.
179:   NOT good code reuse.
180: */
181: #include <petscbt.h>
182: #include <../src/mat/utils/freespace.h>

184: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
185: {
186:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
187:   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
188:   PetscBool          row_identity, col_identity, both_identity;
189:   IS                 isicol;
190:   const PetscInt    *r, *ic;
191:   PetscInt           i, *ai = a->i, *aj = a->j;
192:   PetscInt          *bi, *bj, *ajtmp;
193:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
194:   PetscReal          f;
195:   PetscInt           nlnk, *lnk, k, **bi_ptr;
196:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
197:   PetscBT            lnkbt;
198:   PetscBool          missing;

201:   MatMissingDiagonal(A, &missing, &i);

204:   if (bs > 1) { /* check shifttype */
206:   }

208:   ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
209:   ISGetIndices(isrow, &r);
210:   ISGetIndices(isicol, &ic);

212:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
213:   PetscMalloc1(n + 1, &bi);
214:   PetscMalloc1(n + 1, &bdiag);
215:   bi[0] = bdiag[0] = 0;

217:   /* linked list for storing column indices of the active row */
218:   nlnk = n + 1;
219:   PetscLLCreate(n, n, nlnk, lnk, lnkbt);

221:   PetscMalloc2(n + 1, &bi_ptr, n + 1, &im);

223:   /* initial FreeSpace size is f*(ai[n]+1) */
224:   f = info->fill;
225:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space);

227:   current_space = free_space;

229:   for (i = 0; i < n; i++) {
230:     /* copy previous fill into linked list */
231:     nzi   = 0;
232:     nnz   = ai[r[i] + 1] - ai[r[i]];
233:     ajtmp = aj + ai[r[i]];
234:     PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt);
235:     nzi += nlnk;

237:     /* add pivot rows into linked list */
238:     row = lnk[n];
239:     while (row < i) {
240:       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
241:       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
242:       PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im);
243:       nzi += nlnk;
244:       row = lnk[row];
245:     }
246:     bi[i + 1] = bi[i] + nzi;
247:     im[i]     = nzi;

249:     /* mark bdiag */
250:     nzbd = 0;
251:     nnz  = nzi;
252:     k    = lnk[n];
253:     while (nnz-- && k < i) {
254:       nzbd++;
255:       k = lnk[k];
256:     }
257:     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

259:     /* if free space is not available, make more free space */
260:     if (current_space->local_remaining < nzi) {
261:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */
262:       PetscFreeSpaceGet(nnz, &current_space);
263:       reallocs++;
264:     }

266:     /* copy data into free space, then initialize lnk */
267:     PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt);

269:     bi_ptr[i] = current_space->array;
270:     current_space->array += nzi;
271:     current_space->local_used += nzi;
272:     current_space->local_remaining -= nzi;
273:   }

275:   ISRestoreIndices(isrow, &r);
276:   ISRestoreIndices(isicol, &ic);

278:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
279:   PetscMalloc1(bi[n] + 1, &bj);
280:   PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag);
281:   PetscLLDestroy(lnk, lnkbt);
282:   PetscFree2(bi_ptr, im);

284:   /* put together the new matrix */
285:   MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL);
286:   b = (Mat_SeqBAIJ *)(B)->data;

288:   b->free_a       = PETSC_TRUE;
289:   b->free_ij      = PETSC_TRUE;
290:   b->singlemalloc = PETSC_FALSE;

292:   PetscMalloc1((bdiag[0] + 1) * bs2, &b->a);
293:   b->j             = bj;
294:   b->i             = bi;
295:   b->diag          = bdiag;
296:   b->free_diag     = PETSC_TRUE;
297:   b->ilen          = NULL;
298:   b->imax          = NULL;
299:   b->row           = isrow;
300:   b->col           = iscol;
301:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

303:   PetscObjectReference((PetscObject)isrow);
304:   PetscObjectReference((PetscObject)iscol);
305:   b->icol = isicol;
306:   PetscMalloc1(bs * n + bs, &b->solve_work);

308:   b->maxnz = b->nz = bdiag[0] + 1;

310:   B->factortype            = MAT_FACTOR_LU;
311:   B->info.factor_mallocs   = reallocs;
312:   B->info.fill_ratio_given = f;

314:   if (ai[n] != 0) {
315:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
316:   } else {
317:     B->info.fill_ratio_needed = 0.0;
318:   }
319: #if defined(PETSC_USE_INFO)
320:   if (ai[n] != 0) {
321:     PetscReal af = B->info.fill_ratio_needed;
322:     PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af);
323:     PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
324:     PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af);
325:     PetscInfo(A, "for best performance.\n");
326:   } else {
327:     PetscInfo(A, "Empty matrix\n");
328:   }
329: #endif

331:   ISIdentity(isrow, &row_identity);
332:   ISIdentity(iscol, &col_identity);

334:   both_identity = (PetscBool)(row_identity && col_identity);

336:   MatSeqBAIJSetNumericFactorization(B, both_identity);
337:   return 0;
338: }

340: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
341: {
342:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
343:   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
344:   PetscBool          row_identity, col_identity, both_identity;
345:   IS                 isicol;
346:   const PetscInt    *r, *ic;
347:   PetscInt           i, *ai = a->i, *aj = a->j;
348:   PetscInt          *bi, *bj, *ajtmp;
349:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
350:   PetscReal          f;
351:   PetscInt           nlnk, *lnk, k, **bi_ptr;
352:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
353:   PetscBT            lnkbt;
354:   PetscBool          missing;

357:   MatMissingDiagonal(A, &missing, &i);

360:   ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
361:   ISGetIndices(isrow, &r);
362:   ISGetIndices(isicol, &ic);

364:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
365:   PetscMalloc1(n + 1, &bi);
366:   PetscMalloc1(n + 1, &bdiag);

368:   bi[0] = bdiag[0] = 0;

370:   /* linked list for storing column indices of the active row */
371:   nlnk = n + 1;
372:   PetscLLCreate(n, n, nlnk, lnk, lnkbt);

374:   PetscMalloc2(n + 1, &bi_ptr, n + 1, &im);

376:   /* initial FreeSpace size is f*(ai[n]+1) */
377:   f = info->fill;
378:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space);
379:   current_space = free_space;

381:   for (i = 0; i < n; i++) {
382:     /* copy previous fill into linked list */
383:     nzi   = 0;
384:     nnz   = ai[r[i] + 1] - ai[r[i]];
385:     ajtmp = aj + ai[r[i]];
386:     PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt);
387:     nzi += nlnk;

389:     /* add pivot rows into linked list */
390:     row = lnk[n];
391:     while (row < i) {
392:       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
393:       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
394:       PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im);
395:       nzi += nlnk;
396:       row = lnk[row];
397:     }
398:     bi[i + 1] = bi[i] + nzi;
399:     im[i]     = nzi;

401:     /* mark bdiag */
402:     nzbd = 0;
403:     nnz  = nzi;
404:     k    = lnk[n];
405:     while (nnz-- && k < i) {
406:       nzbd++;
407:       k = lnk[k];
408:     }
409:     bdiag[i] = bi[i] + nzbd;

411:     /* if free space is not available, make more free space */
412:     if (current_space->local_remaining < nzi) {
413:       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
414:       PetscFreeSpaceGet(nnz, &current_space);
415:       reallocs++;
416:     }

418:     /* copy data into free space, then initialize lnk */
419:     PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt);

421:     bi_ptr[i] = current_space->array;
422:     current_space->array += nzi;
423:     current_space->local_used += nzi;
424:     current_space->local_remaining -= nzi;
425:   }
426: #if defined(PETSC_USE_INFO)
427:   if (ai[n] != 0) {
428:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
429:     PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af);
430:     PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
431:     PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af);
432:     PetscInfo(A, "for best performance.\n");
433:   } else {
434:     PetscInfo(A, "Empty matrix\n");
435:   }
436: #endif

438:   ISRestoreIndices(isrow, &r);
439:   ISRestoreIndices(isicol, &ic);

441:   /* destroy list of free space and other temporary array(s) */
442:   PetscMalloc1(bi[n] + 1, &bj);
443:   PetscFreeSpaceContiguous(&free_space, bj);
444:   PetscLLDestroy(lnk, lnkbt);
445:   PetscFree2(bi_ptr, im);

447:   /* put together the new matrix */
448:   MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL);
449:   b = (Mat_SeqBAIJ *)(B)->data;

451:   b->free_a       = PETSC_TRUE;
452:   b->free_ij      = PETSC_TRUE;
453:   b->singlemalloc = PETSC_FALSE;

455:   PetscMalloc1((bi[n] + 1) * bs2, &b->a);
456:   b->j             = bj;
457:   b->i             = bi;
458:   b->diag          = bdiag;
459:   b->free_diag     = PETSC_TRUE;
460:   b->ilen          = NULL;
461:   b->imax          = NULL;
462:   b->row           = isrow;
463:   b->col           = iscol;
464:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

466:   PetscObjectReference((PetscObject)isrow);
467:   PetscObjectReference((PetscObject)iscol);
468:   b->icol = isicol;

470:   PetscMalloc1(bs * n + bs, &b->solve_work);

472:   b->maxnz = b->nz = bi[n];

474:   (B)->factortype            = MAT_FACTOR_LU;
475:   (B)->info.factor_mallocs   = reallocs;
476:   (B)->info.fill_ratio_given = f;

478:   if (ai[n] != 0) {
479:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
480:   } else {
481:     (B)->info.fill_ratio_needed = 0.0;
482:   }

484:   ISIdentity(isrow, &row_identity);
485:   ISIdentity(iscol, &col_identity);

487:   both_identity = (PetscBool)(row_identity && col_identity);

489:   MatSeqBAIJSetNumericFactorization_inplace(B, both_identity);
490:   return 0;
491: }