Actual source code: sbaijfact.c


  2: #include <../src/mat/impls/baij/seq/baij.h>
  3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  4: #include <petsc/private/kernels/blockinvert.h>
  5: #include <petscis.h>

  7: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
  8: {
  9:   Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
 10:   MatScalar    *dd   = fact->a;
 11:   PetscInt      mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;


 15:   nneg_tmp = 0;
 16:   npos_tmp = 0;
 17:   for (i = 0; i < mbs; i++) {
 18:     if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
 19:     else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
 20:     fi++;
 21:   }
 22:   if (nneg) *nneg = nneg_tmp;
 23:   if (npos) *npos = npos_tmp;
 24:   if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
 25:   return 0;
 26: }

 28: /*
 29:   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
 30:   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
 31: */
 32: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info)
 33: {
 34:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b;
 35:   const PetscInt *rip, *ai, *aj;
 36:   PetscInt        i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
 37:   PetscInt        m, reallocs = 0, prow;
 38:   PetscInt       *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
 39:   PetscReal       f = info->fill;
 40:   PetscBool       perm_identity;

 42:   /* check whether perm is the identity mapping */
 43:   ISIdentity(perm, &perm_identity);
 44:   ISGetIndices(perm, &rip);

 46:   if (perm_identity) { /* without permutation */
 47:     a->permute = PETSC_FALSE;

 49:     ai = a->i;
 50:     aj = a->j;
 51:   } else { /* non-trivial permutation */
 52:     a->permute = PETSC_TRUE;

 54:     MatReorderingSeqSBAIJ(A, perm);

 56:     ai = a->inew;
 57:     aj = a->jnew;
 58:   }

 60:   /* initialization */
 61:   PetscMalloc1(mbs + 1, &iu);
 62:   umax = (PetscInt)(f * ai[mbs] + 1);
 63:   umax += mbs + 1;
 64:   PetscMalloc1(umax, &ju);
 65:   iu[0] = mbs + 1;
 66:   juidx = mbs + 1; /* index for ju */
 67:   /* jl linked list for pivot row -- linked list for col index */
 68:   PetscMalloc2(mbs, &jl, mbs, &q);
 69:   for (i = 0; i < mbs; i++) {
 70:     jl[i] = mbs;
 71:     q[i]  = 0;
 72:   }

 74:   /* for each row k */
 75:   for (k = 0; k < mbs; k++) {
 76:     for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
 77:     nzk  = 0;                           /* num. of nz blocks in k-th block row with diagonal block excluded */
 78:     q[k] = mbs;
 79:     /* initialize nonzero structure of k-th row to row rip[k] of A */
 80:     jmin = ai[rip[k]] + 1; /* exclude diag[k] */
 81:     jmax = ai[rip[k] + 1];
 82:     for (j = jmin; j < jmax; j++) {
 83:       vj = rip[aj[j]]; /* col. value */
 84:       if (vj > k) {
 85:         qm = k;
 86:         do {
 87:           m  = qm;
 88:           qm = q[m];
 89:         } while (qm < vj);
 91:         nzk++;
 92:         q[m]  = vj;
 93:         q[vj] = qm;
 94:       } /* if (vj > k) */
 95:     }   /* for (j=jmin; j<jmax; j++) */

 97:     /* modify nonzero structure of k-th row by computing fill-in
 98:        for each row i to be merged in */
 99:     prow = k;
100:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */

102:     while (prow < k) {
103:       /* merge row prow into k-th row */
104:       jmin = iu[prow] + 1;
105:       jmax = iu[prow + 1];
106:       qm   = k;
107:       for (j = jmin; j < jmax; j++) {
108:         vj = ju[j];
109:         do {
110:           m  = qm;
111:           qm = q[m];
112:         } while (qm < vj);
113:         if (qm != vj) {
114:           nzk++;
115:           q[m]  = vj;
116:           q[vj] = qm;
117:           qm    = vj;
118:         }
119:       }
120:       prow = jl[prow]; /* next pivot row */
121:     }

123:     /* add k to row list for first nonzero element in k-th row */
124:     if (nzk > 0) {
125:       i     = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
126:       jl[k] = jl[i];
127:       jl[i] = k;
128:     }
129:     iu[k + 1] = iu[k] + nzk;

131:     /* allocate more space to ju if needed */
132:     if (iu[k + 1] > umax) {
133:       /* estimate how much additional space we will need */
134:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
135:       /* just double the memory each time */
136:       maxadd = umax;
137:       if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
138:       umax += maxadd;

140:       /* allocate a longer ju */
141:       PetscMalloc1(umax, &jutmp);
142:       PetscArraycpy(jutmp, ju, iu[k]);
143:       PetscFree(ju);
144:       ju = jutmp;
145:       reallocs++; /* count how many times we realloc */
146:     }

148:     /* save nonzero structure of k-th row in ju */
149:     i = k;
150:     while (nzk--) {
151:       i           = q[i];
152:       ju[juidx++] = i;
153:     }
154:   }

156: #if defined(PETSC_USE_INFO)
157:   if (ai[mbs] != 0) {
158:     PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
159:     PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af);
160:     PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
161:     PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af);
162:     PetscInfo(A, "for best performance.\n");
163:   } else {
164:     PetscInfo(A, "Empty matrix\n");
165:   }
166: #endif

168:   ISRestoreIndices(perm, &rip);
169:   PetscFree2(jl, q);

171:   /* put together the new matrix */
172:   MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL);

174:   b               = (Mat_SeqSBAIJ *)(F)->data;
175:   b->singlemalloc = PETSC_FALSE;
176:   b->free_a       = PETSC_TRUE;
177:   b->free_ij      = PETSC_TRUE;

179:   PetscMalloc1((iu[mbs] + 1) * bs2, &b->a);
180:   b->j    = ju;
181:   b->i    = iu;
182:   b->diag = NULL;
183:   b->ilen = NULL;
184:   b->imax = NULL;
185:   b->row  = perm;

187:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

189:   PetscObjectReference((PetscObject)perm);

191:   b->icol = perm;
192:   PetscObjectReference((PetscObject)perm);
193:   PetscMalloc1(bs * mbs + bs, &b->solve_work);
194:   /* In b structure:  Free imax, ilen, old a, old j.
195:      Allocate idnew, solve_work, new a, new j */
196:   b->maxnz = b->nz = iu[mbs];

198:   (F)->info.factor_mallocs   = reallocs;
199:   (F)->info.fill_ratio_given = f;
200:   if (ai[mbs] != 0) {
201:     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
202:   } else {
203:     (F)->info.fill_ratio_needed = 0.0;
204:   }
205:   MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity);
206:   return 0;
207: }
208: /*
209:     Symbolic U^T*D*U factorization for SBAIJ format.
210:     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
211: */
212: #include <petscbt.h>
213: #include <../src/mat/utils/freespace.h>
214: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
215: {
216:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
217:   Mat_SeqSBAIJ      *b;
218:   PetscBool          perm_identity, missing;
219:   PetscReal          fill = info->fill;
220:   const PetscInt    *rip, *ai = a->i, *aj = a->j;
221:   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
222:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
223:   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
224:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
225:   PetscBT            lnkbt;

228:   MatMissingDiagonal(A, &missing, &i);
230:   if (bs > 1) {
231:     MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info);
232:     return 0;
233:   }

235:   /* check whether perm is the identity mapping */
236:   ISIdentity(perm, &perm_identity);
238:   a->permute = PETSC_FALSE;
239:   ISGetIndices(perm, &rip);

241:   /* initialization */
242:   PetscMalloc1(mbs + 1, &ui);
243:   PetscMalloc1(mbs + 1, &udiag);
244:   ui[0] = 0;

246:   /* jl: linked list for storing indices of the pivot rows
247:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
248:   PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols);
249:   for (i = 0; i < mbs; i++) {
250:     jl[i] = mbs;
251:     il[i] = 0;
252:   }

254:   /* create and initialize a linked list for storing column indices of the active row k */
255:   nlnk = mbs + 1;
256:   PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt);

258:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
259:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space);
260:   current_space = free_space;

262:   for (k = 0; k < mbs; k++) { /* for each active row k */
263:     /* initialize lnk by the column indices of row rip[k] of A */
264:     nzk   = 0;
265:     ncols = ai[k + 1] - ai[k];
267:     for (j = 0; j < ncols; j++) {
268:       i       = *(aj + ai[k] + j);
269:       cols[j] = i;
270:     }
271:     PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt);
272:     nzk += nlnk;

274:     /* update lnk by computing fill-in for each pivot row to be merged in */
275:     prow = jl[k]; /* 1st pivot row */

277:     while (prow < k) {
278:       nextprow = jl[prow];
279:       /* merge prow into k-th row */
280:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
281:       jmax   = ui[prow + 1];
282:       ncols  = jmax - jmin;
283:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
284:       PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt);
285:       nzk += nlnk;

287:       /* update il and jl for prow */
288:       if (jmin < jmax) {
289:         il[prow] = jmin;
290:         j        = *uj_ptr;
291:         jl[prow] = jl[j];
292:         jl[j]    = prow;
293:       }
294:       prow = nextprow;
295:     }

297:     /* if free space is not available, make more free space */
298:     if (current_space->local_remaining < nzk) {
299:       i = mbs - k + 1;                                   /* num of unfactored rows */
300:       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
301:       PetscFreeSpaceGet(i, &current_space);
302:       reallocs++;
303:     }

305:     /* copy data into free space, then initialize lnk */
306:     PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt);

308:     /* add the k-th row into il and jl */
309:     if (nzk > 1) {
310:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
311:       jl[k] = jl[i];
312:       jl[i] = k;
313:       il[k] = ui[k] + 1;
314:     }
315:     ui_ptr[k] = current_space->array;

317:     current_space->array += nzk;
318:     current_space->local_used += nzk;
319:     current_space->local_remaining -= nzk;

321:     ui[k + 1] = ui[k] + nzk;
322:   }

324:   ISRestoreIndices(perm, &rip);
325:   PetscFree4(ui_ptr, il, jl, cols);

327:   /* destroy list of free space and other temporary array(s) */
328:   PetscMalloc1(ui[mbs] + 1, &uj);
329:   PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag); /* store matrix factor */
330:   PetscLLDestroy(lnk, lnkbt);

332:   /* put together the new matrix in MATSEQSBAIJ format */
333:   MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL);

335:   b               = (Mat_SeqSBAIJ *)fact->data;
336:   b->singlemalloc = PETSC_FALSE;
337:   b->free_a       = PETSC_TRUE;
338:   b->free_ij      = PETSC_TRUE;

340:   PetscMalloc1(ui[mbs] + 1, &b->a);

342:   b->j         = uj;
343:   b->i         = ui;
344:   b->diag      = udiag;
345:   b->free_diag = PETSC_TRUE;
346:   b->ilen      = NULL;
347:   b->imax      = NULL;
348:   b->row       = perm;
349:   b->icol      = perm;

351:   PetscObjectReference((PetscObject)perm);
352:   PetscObjectReference((PetscObject)perm);

354:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

356:   PetscMalloc1(mbs + 1, &b->solve_work);

358:   b->maxnz = b->nz = ui[mbs];

360:   fact->info.factor_mallocs   = reallocs;
361:   fact->info.fill_ratio_given = fill;
362:   if (ai[mbs] != 0) {
363:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
364:   } else {
365:     fact->info.fill_ratio_needed = 0.0;
366:   }
367: #if defined(PETSC_USE_INFO)
368:   if (ai[mbs] != 0) {
369:     PetscReal af = fact->info.fill_ratio_needed;
370:     PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af);
371:     PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
372:     PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af);
373:   } else {
374:     PetscInfo(A, "Empty matrix\n");
375:   }
376: #endif
377:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
378:   return 0;
379: }

381: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
382: {
383:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
384:   Mat_SeqSBAIJ      *b;
385:   PetscBool          perm_identity, missing;
386:   PetscReal          fill = info->fill;
387:   const PetscInt    *rip, *ai, *aj;
388:   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d;
389:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
390:   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
391:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
392:   PetscBT            lnkbt;

394:   MatMissingDiagonal(A, &missing, &d);

397:   /*
398:    This code originally uses Modified Sparse Row (MSR) storage
399:    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
400:    Then it is rewritten so the factor B takes seqsbaij format. However the associated
401:    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
402:    thus the original code in MSR format is still used for these cases.
403:    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
404:    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
405:   */
406:   if (bs > 1) {
407:     MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info);
408:     return 0;
409:   }

411:   /* check whether perm is the identity mapping */
412:   ISIdentity(perm, &perm_identity);
414:   a->permute = PETSC_FALSE;
415:   ai         = a->i;
416:   aj         = a->j;
417:   ISGetIndices(perm, &rip);

419:   /* initialization */
420:   PetscMalloc1(mbs + 1, &ui);
421:   ui[0] = 0;

423:   /* jl: linked list for storing indices of the pivot rows
424:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
425:   PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols);
426:   for (i = 0; i < mbs; i++) {
427:     jl[i] = mbs;
428:     il[i] = 0;
429:   }

431:   /* create and initialize a linked list for storing column indices of the active row k */
432:   nlnk = mbs + 1;
433:   PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt);

435:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
436:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space);
437:   current_space = free_space;

439:   for (k = 0; k < mbs; k++) { /* for each active row k */
440:     /* initialize lnk by the column indices of row rip[k] of A */
441:     nzk   = 0;
442:     ncols = ai[rip[k] + 1] - ai[rip[k]];
443:     for (j = 0; j < ncols; j++) {
444:       i       = *(aj + ai[rip[k]] + j);
445:       cols[j] = rip[i];
446:     }
447:     PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt);
448:     nzk += nlnk;

450:     /* update lnk by computing fill-in for each pivot row to be merged in */
451:     prow = jl[k]; /* 1st pivot row */

453:     while (prow < k) {
454:       nextprow = jl[prow];
455:       /* merge prow into k-th row */
456:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
457:       jmax   = ui[prow + 1];
458:       ncols  = jmax - jmin;
459:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
460:       PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt);
461:       nzk += nlnk;

463:       /* update il and jl for prow */
464:       if (jmin < jmax) {
465:         il[prow] = jmin;

467:         j        = *uj_ptr;
468:         jl[prow] = jl[j];
469:         jl[j]    = prow;
470:       }
471:       prow = nextprow;
472:     }

474:     /* if free space is not available, make more free space */
475:     if (current_space->local_remaining < nzk) {
476:       i = mbs - k + 1;                                                            /* num of unfactored rows */
477:       i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
478:       PetscFreeSpaceGet(i, &current_space);
479:       reallocs++;
480:     }

482:     /* copy data into free space, then initialize lnk */
483:     PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt);

485:     /* add the k-th row into il and jl */
486:     if (nzk - 1 > 0) {
487:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
488:       jl[k] = jl[i];
489:       jl[i] = k;
490:       il[k] = ui[k] + 1;
491:     }
492:     ui_ptr[k] = current_space->array;

494:     current_space->array += nzk;
495:     current_space->local_used += nzk;
496:     current_space->local_remaining -= nzk;

498:     ui[k + 1] = ui[k] + nzk;
499:   }

501:   ISRestoreIndices(perm, &rip);
502:   PetscFree4(ui_ptr, il, jl, cols);

504:   /* destroy list of free space and other temporary array(s) */
505:   PetscMalloc1(ui[mbs] + 1, &uj);
506:   PetscFreeSpaceContiguous(&free_space, uj);
507:   PetscLLDestroy(lnk, lnkbt);

509:   /* put together the new matrix in MATSEQSBAIJ format */
510:   MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL);

512:   b               = (Mat_SeqSBAIJ *)fact->data;
513:   b->singlemalloc = PETSC_FALSE;
514:   b->free_a       = PETSC_TRUE;
515:   b->free_ij      = PETSC_TRUE;

517:   PetscMalloc1(ui[mbs] + 1, &b->a);

519:   b->j    = uj;
520:   b->i    = ui;
521:   b->diag = NULL;
522:   b->ilen = NULL;
523:   b->imax = NULL;
524:   b->row  = perm;

526:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

528:   PetscObjectReference((PetscObject)perm);
529:   b->icol = perm;
530:   PetscObjectReference((PetscObject)perm);
531:   PetscMalloc1(mbs + 1, &b->solve_work);
532:   b->maxnz = b->nz = ui[mbs];

534:   fact->info.factor_mallocs   = reallocs;
535:   fact->info.fill_ratio_given = fill;
536:   if (ai[mbs] != 0) {
537:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
538:   } else {
539:     fact->info.fill_ratio_needed = 0.0;
540:   }
541: #if defined(PETSC_USE_INFO)
542:   if (ai[mbs] != 0) {
543:     PetscReal af = fact->info.fill_ratio_needed;
544:     PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af);
545:     PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
546:     PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af);
547:   } else {
548:     PetscInfo(A, "Empty matrix\n");
549:   }
550: #endif
551:   MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity);
552:   return 0;
553: }

555: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
556: {
557:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
558:   IS              perm = b->row;
559:   const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
560:   PetscInt        i, j;
561:   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
562:   PetscInt        bs = A->rmap->bs, bs2 = a->bs2;
563:   MatScalar      *ba = b->a, *aa, *ap, *dk, *uik;
564:   MatScalar      *u, *diag, *rtmp, *rtmp_ptr;
565:   MatScalar      *work;
566:   PetscInt       *pivots;
567:   PetscBool       allowzeropivot, zeropivotdetected;

569:   /* initialization */
570:   PetscCalloc1(bs2 * mbs, &rtmp);
571:   PetscMalloc2(mbs, &il, mbs, &jl);
572:   allowzeropivot = PetscNot(A->erroriffailure);

574:   il[0] = 0;
575:   for (i = 0; i < mbs; i++) jl[i] = mbs;

577:   PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work);
578:   PetscMalloc1(bs, &pivots);

580:   ISGetIndices(perm, &perm_ptr);

582:   /* check permutation */
583:   if (!a->permute) {
584:     ai = a->i;
585:     aj = a->j;
586:     aa = a->a;
587:   } else {
588:     ai = a->inew;
589:     aj = a->jnew;
590:     PetscMalloc1(bs2 * ai[mbs], &aa);
591:     PetscArraycpy(aa, a->a, bs2 * ai[mbs]);
592:     PetscMalloc1(ai[mbs], &a2anew);
593:     PetscArraycpy(a2anew, a->a2anew, ai[mbs]);

595:     for (i = 0; i < mbs; i++) {
596:       jmin = ai[i];
597:       jmax = ai[i + 1];
598:       for (j = jmin; j < jmax; j++) {
599:         while (a2anew[j] != j) {
600:           k         = a2anew[j];
601:           a2anew[j] = a2anew[k];
602:           a2anew[k] = k;
603:           for (k1 = 0; k1 < bs2; k1++) {
604:             dk[k1]           = aa[k * bs2 + k1];
605:             aa[k * bs2 + k1] = aa[j * bs2 + k1];
606:             aa[j * bs2 + k1] = dk[k1];
607:           }
608:         }
609:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
610:         if (i > aj[j]) {
611:           ap = aa + j * bs2;                       /* ptr to the beginning of j-th block of aa */
612:           for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
613:           for (k = 0; k < bs; k++) {               /* j-th block of aa <- dk^T */
614:             for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
615:           }
616:         }
617:       }
618:     }
619:     PetscFree(a2anew);
620:   }

622:   /* for each row k */
623:   for (k = 0; k < mbs; k++) {
624:     /*initialize k-th row with elements nonzero in row perm(k) of A */
625:     jmin = ai[perm_ptr[k]];
626:     jmax = ai[perm_ptr[k] + 1];

628:     ap = aa + jmin * bs2;
629:     for (j = jmin; j < jmax; j++) {
630:       vj       = perm_ptr[aj[j]]; /* block col. index */
631:       rtmp_ptr = rtmp + vj * bs2;
632:       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
633:     }

635:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
636:     PetscArraycpy(dk, rtmp + k * bs2, bs2);
637:     i = jl[k]; /* first row to be added to k_th row  */

639:     while (i < k) {
640:       nexti = jl[i]; /* next row to be added to k_th row */

642:       /* compute multiplier */
643:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

645:       /* uik = -inv(Di)*U_bar(i,k) */
646:       diag = ba + i * bs2;
647:       u    = ba + ili * bs2;
648:       PetscArrayzero(uik, bs2);
649:       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);

651:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
652:       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
653:       PetscLogFlops(4.0 * bs * bs2);

655:       /* update -U(i,k) */
656:       PetscArraycpy(ba + ili * bs2, uik, bs2);

658:       /* add multiple of row i to k-th row ... */
659:       jmin = ili + 1;
660:       jmax = bi[i + 1];
661:       if (jmin < jmax) {
662:         for (j = jmin; j < jmax; j++) {
663:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
664:           rtmp_ptr = rtmp + bj[j] * bs2;
665:           u        = ba + j * bs2;
666:           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
667:         }
668:         PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin));

670:         /* ... add i to row list for next nonzero entry */
671:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
672:         j     = bj[jmin];
673:         jl[i] = jl[j];
674:         jl[j] = i; /* update jl */
675:       }
676:       i = nexti;
677:     }

679:     /* save nonzero entries in k-th row of U ... */

681:     /* invert diagonal block */
682:     diag = ba + k * bs2;
683:     PetscArraycpy(diag, dk, bs2);

685:     PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected);
686:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

688:     jmin = bi[k];
689:     jmax = bi[k + 1];
690:     if (jmin < jmax) {
691:       for (j = jmin; j < jmax; j++) {
692:         vj       = bj[j]; /* block col. index of U */
693:         u        = ba + j * bs2;
694:         rtmp_ptr = rtmp + vj * bs2;
695:         for (k1 = 0; k1 < bs2; k1++) {
696:           *u++        = *rtmp_ptr;
697:           *rtmp_ptr++ = 0.0;
698:         }
699:       }

701:       /* ... add k to row list for first nonzero entry in k-th row */
702:       il[k] = jmin;
703:       i     = bj[jmin];
704:       jl[k] = jl[i];
705:       jl[i] = k;
706:     }
707:   }

709:   PetscFree(rtmp);
710:   PetscFree2(il, jl);
711:   PetscFree3(dk, uik, work);
712:   PetscFree(pivots);
713:   if (a->permute) PetscFree(aa);

715:   ISRestoreIndices(perm, &perm_ptr);

717:   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
718:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
719:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
720:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;

722:   C->assembled    = PETSC_TRUE;
723:   C->preallocated = PETSC_TRUE;

725:   PetscLogFlops(1.3333 * bs * bs2 * b->mbs); /* from inverting diagonal blocks */
726:   return 0;
727: }

729: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
730: {
731:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
732:   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
733:   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
734:   PetscInt      bs = A->rmap->bs, bs2 = a->bs2;
735:   MatScalar    *ba = b->a, *aa, *ap, *dk, *uik;
736:   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
737:   MatScalar    *work;
738:   PetscInt     *pivots;
739:   PetscBool     allowzeropivot, zeropivotdetected;

741:   PetscCalloc1(bs2 * mbs, &rtmp);
742:   PetscMalloc2(mbs, &il, mbs, &jl);
743:   il[0] = 0;
744:   for (i = 0; i < mbs; i++) jl[i] = mbs;

746:   PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work);
747:   PetscMalloc1(bs, &pivots);
748:   allowzeropivot = PetscNot(A->erroriffailure);

750:   ai = a->i;
751:   aj = a->j;
752:   aa = a->a;

754:   /* for each row k */
755:   for (k = 0; k < mbs; k++) {
756:     /*initialize k-th row with elements nonzero in row k of A */
757:     jmin = ai[k];
758:     jmax = ai[k + 1];
759:     ap   = aa + jmin * bs2;
760:     for (j = jmin; j < jmax; j++) {
761:       vj       = aj[j]; /* block col. index */
762:       rtmp_ptr = rtmp + vj * bs2;
763:       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
764:     }

766:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
767:     PetscArraycpy(dk, rtmp + k * bs2, bs2);
768:     i = jl[k]; /* first row to be added to k_th row  */

770:     while (i < k) {
771:       nexti = jl[i]; /* next row to be added to k_th row */

773:       /* compute multiplier */
774:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

776:       /* uik = -inv(Di)*U_bar(i,k) */
777:       diag = ba + i * bs2;
778:       u    = ba + ili * bs2;
779:       PetscArrayzero(uik, bs2);
780:       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);

782:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
783:       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
784:       PetscLogFlops(2.0 * bs * bs2);

786:       /* update -U(i,k) */
787:       PetscArraycpy(ba + ili * bs2, uik, bs2);

789:       /* add multiple of row i to k-th row ... */
790:       jmin = ili + 1;
791:       jmax = bi[i + 1];
792:       if (jmin < jmax) {
793:         for (j = jmin; j < jmax; j++) {
794:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
795:           rtmp_ptr = rtmp + bj[j] * bs2;
796:           u        = ba + j * bs2;
797:           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
798:         }
799:         PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin));

801:         /* ... add i to row list for next nonzero entry */
802:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
803:         j     = bj[jmin];
804:         jl[i] = jl[j];
805:         jl[j] = i; /* update jl */
806:       }
807:       i = nexti;
808:     }

810:     /* save nonzero entries in k-th row of U ... */

812:     /* invert diagonal block */
813:     diag = ba + k * bs2;
814:     PetscArraycpy(diag, dk, bs2);

816:     PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected);
817:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

819:     jmin = bi[k];
820:     jmax = bi[k + 1];
821:     if (jmin < jmax) {
822:       for (j = jmin; j < jmax; j++) {
823:         vj       = bj[j]; /* block col. index of U */
824:         u        = ba + j * bs2;
825:         rtmp_ptr = rtmp + vj * bs2;
826:         for (k1 = 0; k1 < bs2; k1++) {
827:           *u++        = *rtmp_ptr;
828:           *rtmp_ptr++ = 0.0;
829:         }
830:       }

832:       /* ... add k to row list for first nonzero entry in k-th row */
833:       il[k] = jmin;
834:       i     = bj[jmin];
835:       jl[k] = jl[i];
836:       jl[i] = k;
837:     }
838:   }

840:   PetscFree(rtmp);
841:   PetscFree2(il, jl);
842:   PetscFree3(dk, uik, work);
843:   PetscFree(pivots);

845:   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
846:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
847:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
848:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
849:   C->assembled           = PETSC_TRUE;
850:   C->preallocated        = PETSC_TRUE;

852:   PetscLogFlops(1.3333 * bs * bs2 * b->mbs); /* from inverting diagonal blocks */
853:   return 0;
854: }

856: /*
857:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
858:     Version for blocks 2 by 2.
859: */
860: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info)
861: {
862:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
863:   IS              perm = b->row;
864:   const PetscInt *ai, *aj, *perm_ptr;
865:   PetscInt        i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
866:   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
867:   MatScalar      *ba = b->a, *aa, *ap;
868:   MatScalar      *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
869:   PetscReal       shift = info->shiftamount;
870:   PetscBool       allowzeropivot, zeropivotdetected;

872:   allowzeropivot = PetscNot(A->erroriffailure);

874:   /* initialization */
875:   /* il and jl record the first nonzero element in each row of the accessing
876:      window U(0:k, k:mbs-1).
877:      jl:    list of rows to be added to uneliminated rows
878:             i>= k: jl(i) is the first row to be added to row i
879:             i<  k: jl(i) is the row following row i in some list of rows
880:             jl(i) = mbs indicates the end of a list
881:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
882:             row i of U */
883:   PetscCalloc1(4 * mbs, &rtmp);
884:   PetscMalloc2(mbs, &il, mbs, &jl);
885:   il[0] = 0;
886:   for (i = 0; i < mbs; i++) jl[i] = mbs;

888:   ISGetIndices(perm, &perm_ptr);

890:   /* check permutation */
891:   if (!a->permute) {
892:     ai = a->i;
893:     aj = a->j;
894:     aa = a->a;
895:   } else {
896:     ai = a->inew;
897:     aj = a->jnew;
898:     PetscMalloc1(4 * ai[mbs], &aa);
899:     PetscArraycpy(aa, a->a, 4 * ai[mbs]);
900:     PetscMalloc1(ai[mbs], &a2anew);
901:     PetscArraycpy(a2anew, a->a2anew, ai[mbs]);

903:     for (i = 0; i < mbs; i++) {
904:       jmin = ai[i];
905:       jmax = ai[i + 1];
906:       for (j = jmin; j < jmax; j++) {
907:         while (a2anew[j] != j) {
908:           k         = a2anew[j];
909:           a2anew[j] = a2anew[k];
910:           a2anew[k] = k;
911:           for (k1 = 0; k1 < 4; k1++) {
912:             dk[k1]         = aa[k * 4 + k1];
913:             aa[k * 4 + k1] = aa[j * 4 + k1];
914:             aa[j * 4 + k1] = dk[k1];
915:           }
916:         }
917:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
918:         if (i > aj[j]) {
919:           ap    = aa + j * 4; /* ptr to the beginning of the block */
920:           dk[1] = ap[1];      /* swap ap[1] and ap[2] */
921:           ap[1] = ap[2];
922:           ap[2] = dk[1];
923:         }
924:       }
925:     }
926:     PetscFree(a2anew);
927:   }

929:   /* for each row k */
930:   for (k = 0; k < mbs; k++) {
931:     /*initialize k-th row with elements nonzero in row perm(k) of A */
932:     jmin = ai[perm_ptr[k]];
933:     jmax = ai[perm_ptr[k] + 1];
934:     ap   = aa + jmin * 4;
935:     for (j = jmin; j < jmax; j++) {
936:       vj       = perm_ptr[aj[j]]; /* block col. index */
937:       rtmp_ptr = rtmp + vj * 4;
938:       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
939:     }

941:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
942:     PetscArraycpy(dk, rtmp + k * 4, 4);
943:     i = jl[k]; /* first row to be added to k_th row  */

945:     while (i < k) {
946:       nexti = jl[i]; /* next row to be added to k_th row */

948:       /* compute multiplier */
949:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

951:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
952:       diag   = ba + i * 4;
953:       u      = ba + ili * 4;
954:       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
955:       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
956:       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
957:       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);

959:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
960:       dk[0] += uik[0] * u[0] + uik[1] * u[1];
961:       dk[1] += uik[2] * u[0] + uik[3] * u[1];
962:       dk[2] += uik[0] * u[2] + uik[1] * u[3];
963:       dk[3] += uik[2] * u[2] + uik[3] * u[3];

965:       PetscLogFlops(16.0 * 2.0);

967:       /* update -U(i,k): ba[ili] = uik */
968:       PetscArraycpy(ba + ili * 4, uik, 4);

970:       /* add multiple of row i to k-th row ... */
971:       jmin = ili + 1;
972:       jmax = bi[i + 1];
973:       if (jmin < jmax) {
974:         for (j = jmin; j < jmax; j++) {
975:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
976:           rtmp_ptr = rtmp + bj[j] * 4;
977:           u        = ba + j * 4;
978:           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
979:           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
980:           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
981:           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
982:         }
983:         PetscLogFlops(16.0 * (jmax - jmin));

985:         /* ... add i to row list for next nonzero entry */
986:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
987:         j     = bj[jmin];
988:         jl[i] = jl[j];
989:         jl[j] = i; /* update jl */
990:       }
991:       i = nexti;
992:     }

994:     /* save nonzero entries in k-th row of U ... */

996:     /* invert diagonal block */
997:     diag = ba + k * 4;
998:     PetscArraycpy(diag, dk, 4);
999:     PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected);
1000:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

1002:     jmin = bi[k];
1003:     jmax = bi[k + 1];
1004:     if (jmin < jmax) {
1005:       for (j = jmin; j < jmax; j++) {
1006:         vj       = bj[j]; /* block col. index of U */
1007:         u        = ba + j * 4;
1008:         rtmp_ptr = rtmp + vj * 4;
1009:         for (k1 = 0; k1 < 4; k1++) {
1010:           *u++        = *rtmp_ptr;
1011:           *rtmp_ptr++ = 0.0;
1012:         }
1013:       }

1015:       /* ... add k to row list for first nonzero entry in k-th row */
1016:       il[k] = jmin;
1017:       i     = bj[jmin];
1018:       jl[k] = jl[i];
1019:       jl[i] = k;
1020:     }
1021:   }

1023:   PetscFree(rtmp);
1024:   PetscFree2(il, jl);
1025:   if (a->permute) PetscFree(aa);
1026:   ISRestoreIndices(perm, &perm_ptr);

1028:   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
1029:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1030:   C->assembled           = PETSC_TRUE;
1031:   C->preallocated        = PETSC_TRUE;

1033:   PetscLogFlops(1.3333 * 8 * b->mbs); /* from inverting diagonal blocks */
1034:   return 0;
1035: }

1037: /*
1038:       Version for when blocks are 2 by 2 Using natural ordering
1039: */
1040: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
1041: {
1042:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1043:   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1044:   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1045:   MatScalar    *ba = b->a, *aa, *ap, dk[8], uik[8];
1046:   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
1047:   PetscReal     shift = info->shiftamount;
1048:   PetscBool     allowzeropivot, zeropivotdetected;

1050:   allowzeropivot = PetscNot(A->erroriffailure);

1052:   /* initialization */
1053:   /* il and jl record the first nonzero element in each row of the accessing
1054:      window U(0:k, k:mbs-1).
1055:      jl:    list of rows to be added to uneliminated rows
1056:             i>= k: jl(i) is the first row to be added to row i
1057:             i<  k: jl(i) is the row following row i in some list of rows
1058:             jl(i) = mbs indicates the end of a list
1059:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1060:             row i of U */
1061:   PetscCalloc1(4 * mbs, &rtmp);
1062:   PetscMalloc2(mbs, &il, mbs, &jl);
1063:   il[0] = 0;
1064:   for (i = 0; i < mbs; i++) jl[i] = mbs;

1066:   ai = a->i;
1067:   aj = a->j;
1068:   aa = a->a;

1070:   /* for each row k */
1071:   for (k = 0; k < mbs; k++) {
1072:     /*initialize k-th row with elements nonzero in row k of A */
1073:     jmin = ai[k];
1074:     jmax = ai[k + 1];
1075:     ap   = aa + jmin * 4;
1076:     for (j = jmin; j < jmax; j++) {
1077:       vj       = aj[j]; /* block col. index */
1078:       rtmp_ptr = rtmp + vj * 4;
1079:       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1080:     }

1082:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1083:     PetscArraycpy(dk, rtmp + k * 4, 4);
1084:     i = jl[k]; /* first row to be added to k_th row  */

1086:     while (i < k) {
1087:       nexti = jl[i]; /* next row to be added to k_th row */

1089:       /* compute multiplier */
1090:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

1092:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1093:       diag   = ba + i * 4;
1094:       u      = ba + ili * 4;
1095:       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1096:       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1097:       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1098:       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);

1100:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1101:       dk[0] += uik[0] * u[0] + uik[1] * u[1];
1102:       dk[1] += uik[2] * u[0] + uik[3] * u[1];
1103:       dk[2] += uik[0] * u[2] + uik[1] * u[3];
1104:       dk[3] += uik[2] * u[2] + uik[3] * u[3];

1106:       PetscLogFlops(16.0 * 2.0);

1108:       /* update -U(i,k): ba[ili] = uik */
1109:       PetscArraycpy(ba + ili * 4, uik, 4);

1111:       /* add multiple of row i to k-th row ... */
1112:       jmin = ili + 1;
1113:       jmax = bi[i + 1];
1114:       if (jmin < jmax) {
1115:         for (j = jmin; j < jmax; j++) {
1116:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1117:           rtmp_ptr = rtmp + bj[j] * 4;
1118:           u        = ba + j * 4;
1119:           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1120:           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1121:           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1122:           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1123:         }
1124:         PetscLogFlops(16.0 * (jmax - jmin));

1126:         /* ... add i to row list for next nonzero entry */
1127:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1128:         j     = bj[jmin];
1129:         jl[i] = jl[j];
1130:         jl[j] = i; /* update jl */
1131:       }
1132:       i = nexti;
1133:     }

1135:     /* save nonzero entries in k-th row of U ... */

1137:     /* invert diagonal block */
1138:     diag = ba + k * 4;
1139:     PetscArraycpy(diag, dk, 4);
1140:     PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected);
1141:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

1143:     jmin = bi[k];
1144:     jmax = bi[k + 1];
1145:     if (jmin < jmax) {
1146:       for (j = jmin; j < jmax; j++) {
1147:         vj       = bj[j]; /* block col. index of U */
1148:         u        = ba + j * 4;
1149:         rtmp_ptr = rtmp + vj * 4;
1150:         for (k1 = 0; k1 < 4; k1++) {
1151:           *u++        = *rtmp_ptr;
1152:           *rtmp_ptr++ = 0.0;
1153:         }
1154:       }

1156:       /* ... add k to row list for first nonzero entry in k-th row */
1157:       il[k] = jmin;
1158:       i     = bj[jmin];
1159:       jl[k] = jl[i];
1160:       jl[i] = k;
1161:     }
1162:   }

1164:   PetscFree(rtmp);
1165:   PetscFree2(il, jl);

1167:   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1168:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1169:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1170:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1171:   C->assembled           = PETSC_TRUE;
1172:   C->preallocated        = PETSC_TRUE;

1174:   PetscLogFlops(1.3333 * 8 * b->mbs); /* from inverting diagonal blocks */
1175:   return 0;
1176: }

1178: /*
1179:     Numeric U^T*D*U factorization for SBAIJ format.
1180:     Version for blocks are 1 by 1.
1181: */
1182: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
1183: {
1184:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1185:   IS              ip = b->row;
1186:   const PetscInt *ai, *aj, *rip;
1187:   PetscInt       *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1188:   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1189:   MatScalar      *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1190:   PetscReal       rs;
1191:   FactorShiftCtx  sctx;

1193:   /* MatPivotSetUp(): initialize shift context sctx */
1194:   PetscMemzero(&sctx, sizeof(FactorShiftCtx));

1196:   ISGetIndices(ip, &rip);
1197:   if (!a->permute) {
1198:     ai = a->i;
1199:     aj = a->j;
1200:     aa = a->a;
1201:   } else {
1202:     ai = a->inew;
1203:     aj = a->jnew;
1204:     nz = ai[mbs];
1205:     PetscMalloc1(nz, &aa);
1206:     a2anew = a->a2anew;
1207:     bval   = a->a;
1208:     for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
1209:   }

1211:   /* initialization */
1212:   /* il and jl record the first nonzero element in each row of the accessing
1213:      window U(0:k, k:mbs-1).
1214:      jl:    list of rows to be added to uneliminated rows
1215:             i>= k: jl(i) is the first row to be added to row i
1216:             i<  k: jl(i) is the row following row i in some list of rows
1217:             jl(i) = mbs indicates the end of a list
1218:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1219:             row i of U */
1220:   PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl);

1222:   do {
1223:     sctx.newshift = PETSC_FALSE;
1224:     il[0]         = 0;
1225:     for (i = 0; i < mbs; i++) {
1226:       rtmp[i] = 0.0;
1227:       jl[i]   = mbs;
1228:     }

1230:     for (k = 0; k < mbs; k++) {
1231:       /*initialize k-th row by the perm[k]-th row of A */
1232:       jmin = ai[rip[k]];
1233:       jmax = ai[rip[k] + 1];
1234:       bval = ba + bi[k];
1235:       for (j = jmin; j < jmax; j++) {
1236:         col       = rip[aj[j]];
1237:         rtmp[col] = aa[j];
1238:         *bval++   = 0.0; /* for in-place factorization */
1239:       }

1241:       /* shift the diagonal of the matrix */
1242:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1244:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1245:       dk = rtmp[k];
1246:       i  = jl[k]; /* first row to be added to k_th row  */

1248:       while (i < k) {
1249:         nexti = jl[i]; /* next row to be added to k_th row */

1251:         /* compute multiplier, update diag(k) and U(i,k) */
1252:         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
1253:         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1254:         dk += uikdi * ba[ili];
1255:         ba[ili] = uikdi; /* -U(i,k) */

1257:         /* add multiple of row i to k-th row */
1258:         jmin = ili + 1;
1259:         jmax = bi[i + 1];
1260:         if (jmin < jmax) {
1261:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1262:           PetscLogFlops(2.0 * (jmax - jmin));

1264:           /* update il and jl for row i */
1265:           il[i] = jmin;
1266:           j     = bj[jmin];
1267:           jl[i] = jl[j];
1268:           jl[j] = i;
1269:         }
1270:         i = nexti;
1271:       }

1273:       /* shift the diagonals when zero pivot is detected */
1274:       /* compute rs=sum of abs(off-diagonal) */
1275:       rs   = 0.0;
1276:       jmin = bi[k] + 1;
1277:       nz   = bi[k + 1] - jmin;
1278:       if (nz) {
1279:         bcol = bj + jmin;
1280:         while (nz--) {
1281:           rs += PetscAbsScalar(rtmp[*bcol]);
1282:           bcol++;
1283:         }
1284:       }

1286:       sctx.rs = rs;
1287:       sctx.pv = dk;
1288:       MatPivotCheck(C, A, info, &sctx, k);
1289:       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1290:       dk = sctx.pv;

1292:       /* copy data into U(k,:) */
1293:       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1294:       jmin      = bi[k] + 1;
1295:       jmax      = bi[k + 1];
1296:       if (jmin < jmax) {
1297:         for (j = jmin; j < jmax; j++) {
1298:           col       = bj[j];
1299:           ba[j]     = rtmp[col];
1300:           rtmp[col] = 0.0;
1301:         }
1302:         /* add the k-th row into il and jl */
1303:         il[k] = jmin;
1304:         i     = bj[jmin];
1305:         jl[k] = jl[i];
1306:         jl[i] = k;
1307:       }
1308:     }
1309:   } while (sctx.newshift);
1310:   PetscFree3(rtmp, il, jl);
1311:   if (a->permute) PetscFree(aa);

1313:   ISRestoreIndices(ip, &rip);

1315:   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1316:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1317:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1318:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1319:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1320:   C->assembled           = PETSC_TRUE;
1321:   C->preallocated        = PETSC_TRUE;

1323:   PetscLogFlops(C->rmap->N);
1324:   if (sctx.nshift) {
1325:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1326:       PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
1327:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1328:       PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
1329:     }
1330:   }
1331:   return 0;
1332: }

1334: /*
1335:   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1336:   Modified from MatCholeskyFactorNumeric_SeqAIJ()
1337: */
1338: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
1339: {
1340:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data;
1341:   Mat_SeqSBAIJ  *b = (Mat_SeqSBAIJ *)B->data;
1342:   PetscInt       i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1343:   PetscInt      *ai = a->i, *aj = a->j, *ajtmp;
1344:   PetscInt       k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1345:   MatScalar     *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1346:   FactorShiftCtx sctx;
1347:   PetscReal      rs;
1348:   MatScalar      d, *v;

1350:   PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r);

1352:   /* MatPivotSetUp(): initialize shift context sctx */
1353:   PetscMemzero(&sctx, sizeof(FactorShiftCtx));

1355:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1356:     sctx.shift_top = info->zeropivot;

1358:     PetscArrayzero(rtmp, mbs);

1360:     for (i = 0; i < mbs; i++) {
1361:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1362:       d = (aa)[a->diag[i]];
1363:       rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1364:       ajtmp = aj + ai[i] + 1;       /* exclude diagonal */
1365:       v     = aa + ai[i] + 1;
1366:       nz    = ai[i + 1] - ai[i] - 1;
1367:       for (j = 0; j < nz; j++) {
1368:         rtmp[i] += PetscAbsScalar(v[j]);
1369:         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1370:       }
1371:       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1372:     }
1373:     sctx.shift_top *= 1.1;
1374:     sctx.nshift_max = 5;
1375:     sctx.shift_lo   = 0.;
1376:     sctx.shift_hi   = 1.;
1377:   }

1379:   /* allocate working arrays
1380:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1381:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1382:   */
1383:   do {
1384:     sctx.newshift = PETSC_FALSE;

1386:     for (i = 0; i < mbs; i++) c2r[i] = mbs;
1387:     if (mbs) il[0] = 0;

1389:     for (k = 0; k < mbs; k++) {
1390:       /* zero rtmp */
1391:       nz    = bi[k + 1] - bi[k];
1392:       bjtmp = bj + bi[k];
1393:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

1395:       /* load in initial unfactored row */
1396:       bval = ba + bi[k];
1397:       jmin = ai[k];
1398:       jmax = ai[k + 1];
1399:       for (j = jmin; j < jmax; j++) {
1400:         col       = aj[j];
1401:         rtmp[col] = aa[j];
1402:         *bval++   = 0.0; /* for in-place factorization */
1403:       }
1404:       /* shift the diagonal of the matrix: ZeropivotApply() */
1405:       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */

1407:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1408:       dk = rtmp[k];
1409:       i  = c2r[k]; /* first row to be added to k_th row  */

1411:       while (i < k) {
1412:         nexti = c2r[i]; /* next row to be added to k_th row */

1414:         /* compute multiplier, update diag(k) and U(i,k) */
1415:         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
1416:         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1417:         dk += uikdi * ba[ili];           /* update diag[k] */
1418:         ba[ili] = uikdi;                 /* -U(i,k) */

1420:         /* add multiple of row i to k-th row */
1421:         jmin = ili + 1;
1422:         jmax = bi[i + 1];
1423:         if (jmin < jmax) {
1424:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1425:           /* update il and c2r for row i */
1426:           il[i]  = jmin;
1427:           j      = bj[jmin];
1428:           c2r[i] = c2r[j];
1429:           c2r[j] = i;
1430:         }
1431:         i = nexti;
1432:       }

1434:       /* copy data into U(k,:) */
1435:       rs   = 0.0;
1436:       jmin = bi[k];
1437:       jmax = bi[k + 1] - 1;
1438:       if (jmin < jmax) {
1439:         for (j = jmin; j < jmax; j++) {
1440:           col   = bj[j];
1441:           ba[j] = rtmp[col];
1442:           rs += PetscAbsScalar(ba[j]);
1443:         }
1444:         /* add the k-th row into il and c2r */
1445:         il[k]  = jmin;
1446:         i      = bj[jmin];
1447:         c2r[k] = c2r[i];
1448:         c2r[i] = k;
1449:       }

1451:       sctx.rs = rs;
1452:       sctx.pv = dk;
1453:       MatPivotCheck(B, A, info, &sctx, k);
1454:       if (sctx.newshift) break;
1455:       dk = sctx.pv;

1457:       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1458:     }
1459:   } while (sctx.newshift);

1461:   PetscFree3(rtmp, il, c2r);

1463:   B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1464:   B->ops->solves         = MatSolves_SeqSBAIJ_1;
1465:   B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1466:   B->ops->matsolve       = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1467:   B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1468:   B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;

1470:   B->assembled    = PETSC_TRUE;
1471:   B->preallocated = PETSC_TRUE;

1473:   PetscLogFlops(B->rmap->n);

1475:   /* MatPivotView() */
1476:   if (sctx.nshift) {
1477:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1478:       PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top);
1479:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1480:       PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
1481:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1482:       PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount);
1483:     }
1484:   }
1485:   return 0;
1486: }

1488: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
1489: {
1490:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1491:   PetscInt       i, j, mbs = a->mbs;
1492:   PetscInt      *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1493:   PetscInt       k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1494:   MatScalar     *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1495:   PetscReal      rs;
1496:   FactorShiftCtx sctx;

1498:   /* MatPivotSetUp(): initialize shift context sctx */
1499:   PetscMemzero(&sctx, sizeof(FactorShiftCtx));

1501:   /* initialization */
1502:   /* il and jl record the first nonzero element in each row of the accessing
1503:      window U(0:k, k:mbs-1).
1504:      jl:    list of rows to be added to uneliminated rows
1505:             i>= k: jl(i) is the first row to be added to row i
1506:             i<  k: jl(i) is the row following row i in some list of rows
1507:             jl(i) = mbs indicates the end of a list
1508:      il(i): points to the first nonzero element in U(i,k:mbs-1)
1509:   */
1510:   PetscMalloc1(mbs, &rtmp);
1511:   PetscMalloc2(mbs, &il, mbs, &jl);

1513:   do {
1514:     sctx.newshift = PETSC_FALSE;
1515:     il[0]         = 0;
1516:     for (i = 0; i < mbs; i++) {
1517:       rtmp[i] = 0.0;
1518:       jl[i]   = mbs;
1519:     }

1521:     for (k = 0; k < mbs; k++) {
1522:       /*initialize k-th row with elements nonzero in row perm(k) of A */
1523:       nz   = ai[k + 1] - ai[k];
1524:       acol = aj + ai[k];
1525:       aval = aa + ai[k];
1526:       bval = ba + bi[k];
1527:       while (nz--) {
1528:         rtmp[*acol++] = *aval++;
1529:         *bval++       = 0.0; /* for in-place factorization */
1530:       }

1532:       /* shift the diagonal of the matrix */
1533:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1535:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1536:       dk = rtmp[k];
1537:       i  = jl[k]; /* first row to be added to k_th row  */

1539:       while (i < k) {
1540:         nexti = jl[i]; /* next row to be added to k_th row */
1541:         /* compute multiplier, update D(k) and U(i,k) */
1542:         ili   = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1543:         uikdi = -ba[ili] * ba[bi[i]];
1544:         dk += uikdi * ba[ili];
1545:         ba[ili] = uikdi; /* -U(i,k) */

1547:         /* add multiple of row i to k-th row ... */
1548:         jmin = ili + 1;
1549:         nz   = bi[i + 1] - jmin;
1550:         if (nz > 0) {
1551:           bcol = bj + jmin;
1552:           bval = ba + jmin;
1553:           PetscLogFlops(2.0 * nz);
1554:           while (nz--) rtmp[*bcol++] += uikdi * (*bval++);

1556:           /* update il and jl for i-th row */
1557:           il[i] = jmin;
1558:           j     = bj[jmin];
1559:           jl[i] = jl[j];
1560:           jl[j] = i;
1561:         }
1562:         i = nexti;
1563:       }

1565:       /* shift the diagonals when zero pivot is detected */
1566:       /* compute rs=sum of abs(off-diagonal) */
1567:       rs   = 0.0;
1568:       jmin = bi[k] + 1;
1569:       nz   = bi[k + 1] - jmin;
1570:       if (nz) {
1571:         bcol = bj + jmin;
1572:         while (nz--) {
1573:           rs += PetscAbsScalar(rtmp[*bcol]);
1574:           bcol++;
1575:         }
1576:       }

1578:       sctx.rs = rs;
1579:       sctx.pv = dk;
1580:       MatPivotCheck(C, A, info, &sctx, k);
1581:       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1582:       dk = sctx.pv;

1584:       /* copy data into U(k,:) */
1585:       ba[bi[k]] = 1.0 / dk;
1586:       jmin      = bi[k] + 1;
1587:       nz        = bi[k + 1] - jmin;
1588:       if (nz) {
1589:         bcol = bj + jmin;
1590:         bval = ba + jmin;
1591:         while (nz--) {
1592:           *bval++       = rtmp[*bcol];
1593:           rtmp[*bcol++] = 0.0;
1594:         }
1595:         /* add k-th row into il and jl */
1596:         il[k] = jmin;
1597:         i     = bj[jmin];
1598:         jl[k] = jl[i];
1599:         jl[i] = k;
1600:       }
1601:     } /* end of for (k = 0; k<mbs; k++) */
1602:   } while (sctx.newshift);
1603:   PetscFree(rtmp);
1604:   PetscFree2(il, jl);

1606:   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1607:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1608:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1609:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1610:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;

1612:   C->assembled    = PETSC_TRUE;
1613:   C->preallocated = PETSC_TRUE;

1615:   PetscLogFlops(C->rmap->N);
1616:   if (sctx.nshift) {
1617:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1618:       PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
1619:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1620:       PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
1621:     }
1622:   }
1623:   return 0;
1624: }

1626: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info)
1627: {
1628:   Mat C;

1630:   MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C);
1631:   MatCholeskyFactorSymbolic(C, A, perm, info);
1632:   MatCholeskyFactorNumeric(C, A, info);

1634:   A->ops->solve          = C->ops->solve;
1635:   A->ops->solvetranspose = C->ops->solvetranspose;

1637:   MatHeaderMerge(A, &C);
1638:   return 0;
1639: }