Actual source code: aijfact.c


  2: #include <../src/mat/impls/aij/seq/aij.h>
  3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  4: #include <petscbt.h>
  5: #include <../src/mat/utils/freespace.h>

  7: /*
  8:       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix

 10:       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
 11: */
 12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
 13: {
 14:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
 15:   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
 16:   const PetscInt    *ai = a->i, *aj = a->j;
 17:   const PetscScalar *aa = a->a;
 18:   PetscBool         *done;
 19:   PetscReal          best, past = 0, future;

 21:   /* pick initial row */
 22:   best = -1;
 23:   for (i = 0; i < n; i++) {
 24:     future = 0.0;
 25:     for (j = ai[i]; j < ai[i + 1]; j++) {
 26:       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
 27:       else past = PetscAbsScalar(aa[j]);
 28:     }
 29:     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 30:     if (past / future > best) {
 31:       best    = past / future;
 32:       current = i;
 33:     }
 34:   }

 36:   PetscMalloc1(n, &done);
 37:   PetscArrayzero(done, n);
 38:   PetscMalloc1(n, &order);
 39:   order[0] = current;
 40:   for (i = 0; i < n - 1; i++) {
 41:     done[current] = PETSC_TRUE;
 42:     best          = -1;
 43:     /* loop over all neighbors of current pivot */
 44:     for (j = ai[current]; j < ai[current + 1]; j++) {
 45:       jj = aj[j];
 46:       if (done[jj]) continue;
 47:       /* loop over columns of potential next row computing weights for below and above diagonal */
 48:       past = future = 0.0;
 49:       for (k = ai[jj]; k < ai[jj + 1]; k++) {
 50:         kk = aj[k];
 51:         if (done[kk]) past += PetscAbsScalar(aa[k]);
 52:         else if (kk != jj) future += PetscAbsScalar(aa[k]);
 53:       }
 54:       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 55:       if (past / future > best) {
 56:         best       = past / future;
 57:         newcurrent = jj;
 58:       }
 59:     }
 60:     if (best == -1) { /* no neighbors to select from so select best of all that remain */
 61:       best = -1;
 62:       for (k = 0; k < n; k++) {
 63:         if (done[k]) continue;
 64:         future = 0.0;
 65:         past   = 0.0;
 66:         for (j = ai[k]; j < ai[k + 1]; j++) {
 67:           kk = aj[j];
 68:           if (done[kk]) past += PetscAbsScalar(aa[j]);
 69:           else if (kk != k) future += PetscAbsScalar(aa[j]);
 70:         }
 71:         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 72:         if (past / future > best) {
 73:           best       = past / future;
 74:           newcurrent = k;
 75:         }
 76:       }
 77:     }
 79:     current      = newcurrent;
 80:     order[i + 1] = current;
 81:   }
 82:   ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow);
 83:   *icol = *irow;
 84:   PetscObjectReference((PetscObject)*irow);
 85:   PetscFree(done);
 86:   PetscFree(order);
 87:   return 0;
 88: }

 90: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
 91: {
 92:   *type = MATSOLVERPETSC;
 93:   return 0;
 94: }

 96: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
 97: {
 98:   PetscInt n = A->rmap->n;

100: #if defined(PETSC_USE_COMPLEX)
102: #endif
103:   MatCreate(PetscObjectComm((PetscObject)A), B);
104:   MatSetSizes(*B, n, n, n, n);
105:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
106:     MatSetType(*B, MATSEQAIJ);

108:     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
109:     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;

111:     MatSetBlockSizesFromMats(*B, A, A);
112:     PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]);
113:     PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]);
114:     PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]);
115:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
116:     MatSetType(*B, MATSEQSBAIJ);
117:     MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL);

119:     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
120:     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
121:     PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]);
122:     PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]);
123:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
124:   (*B)->factortype = ftype;

126:   PetscFree((*B)->solvertype);
127:   PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype);
128:   (*B)->canuseordering = PETSC_TRUE;
129:   PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc);
130:   return 0;
131: }

133: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
134: {
135:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
136:   IS                 isicol;
137:   const PetscInt    *r, *ic;
138:   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
139:   PetscInt          *bi, *bj, *ajtmp;
140:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
141:   PetscReal          f;
142:   PetscInt           nlnk, *lnk, k, **bi_ptr;
143:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
144:   PetscBT            lnkbt;
145:   PetscBool          missing;

148:   MatMissingDiagonal(A, &missing, &i);

151:   ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
152:   ISGetIndices(isrow, &r);
153:   ISGetIndices(isicol, &ic);

155:   /* get new row pointers */
156:   PetscMalloc1(n + 1, &bi);
157:   bi[0] = 0;

159:   /* bdiag is location of diagonal in factor */
160:   PetscMalloc1(n + 1, &bdiag);
161:   bdiag[0] = 0;

163:   /* linked list for storing column indices of the active row */
164:   nlnk = n + 1;
165:   PetscLLCreate(n, n, nlnk, lnk, lnkbt);

167:   PetscMalloc2(n + 1, &bi_ptr, n + 1, &im);

169:   /* initial FreeSpace size is f*(ai[n]+1) */
170:   f = info->fill;
171:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
172:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space);
173:   current_space = free_space;

175:   for (i = 0; i < n; i++) {
176:     /* copy previous fill into linked list */
177:     nzi   = 0;
178:     nnz   = ai[r[i] + 1] - ai[r[i]];
179:     ajtmp = aj + ai[r[i]];
180:     PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt);
181:     nzi += nlnk;

183:     /* add pivot rows into linked list */
184:     row = lnk[n];
185:     while (row < i) {
186:       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
187:       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
188:       PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im);
189:       nzi += nlnk;
190:       row = lnk[row];
191:     }
192:     bi[i + 1] = bi[i] + nzi;
193:     im[i]     = nzi;

195:     /* mark bdiag */
196:     nzbd = 0;
197:     nnz  = nzi;
198:     k    = lnk[n];
199:     while (nnz-- && k < i) {
200:       nzbd++;
201:       k = lnk[k];
202:     }
203:     bdiag[i] = bi[i] + nzbd;

205:     /* if free space is not available, make more free space */
206:     if (current_space->local_remaining < nzi) {
207:       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
208:       PetscFreeSpaceGet(nnz, &current_space);
209:       reallocs++;
210:     }

212:     /* copy data into free space, then initialize lnk */
213:     PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt);

215:     bi_ptr[i] = current_space->array;
216:     current_space->array += nzi;
217:     current_space->local_used += nzi;
218:     current_space->local_remaining -= nzi;
219:   }
220: #if defined(PETSC_USE_INFO)
221:   if (ai[n] != 0) {
222:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
223:     PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af);
224:     PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
225:     PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af);
226:     PetscInfo(A, "for best performance.\n");
227:   } else {
228:     PetscInfo(A, "Empty matrix\n");
229:   }
230: #endif

232:   ISRestoreIndices(isrow, &r);
233:   ISRestoreIndices(isicol, &ic);

235:   /* destroy list of free space and other temporary array(s) */
236:   PetscMalloc1(bi[n] + 1, &bj);
237:   PetscFreeSpaceContiguous(&free_space, bj);
238:   PetscLLDestroy(lnk, lnkbt);
239:   PetscFree2(bi_ptr, im);

241:   /* put together the new matrix */
242:   MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL);
243:   b = (Mat_SeqAIJ *)(B)->data;

245:   b->free_a       = PETSC_TRUE;
246:   b->free_ij      = PETSC_TRUE;
247:   b->singlemalloc = PETSC_FALSE;

249:   PetscMalloc1(bi[n] + 1, &b->a);
250:   b->j    = bj;
251:   b->i    = bi;
252:   b->diag = bdiag;
253:   b->ilen = NULL;
254:   b->imax = NULL;
255:   b->row  = isrow;
256:   b->col  = iscol;
257:   PetscObjectReference((PetscObject)isrow);
258:   PetscObjectReference((PetscObject)iscol);
259:   b->icol = isicol;
260:   PetscMalloc1(n + 1, &b->solve_work);

262:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
263:   b->maxnz = b->nz = bi[n];

265:   (B)->factortype            = MAT_FACTOR_LU;
266:   (B)->info.factor_mallocs   = reallocs;
267:   (B)->info.fill_ratio_given = f;

269:   if (ai[n]) {
270:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
271:   } else {
272:     (B)->info.fill_ratio_needed = 0.0;
273:   }
274:   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
275:   if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
276:   return 0;
277: }

279: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
280: {
281:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
282:   IS                 isicol;
283:   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
284:   PetscInt           i, n = A->rmap->n;
285:   PetscInt          *bi, *bj;
286:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
287:   PetscReal          f;
288:   PetscInt           nlnk, *lnk, k, **bi_ptr;
289:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
290:   PetscBT            lnkbt;
291:   PetscBool          missing;

294:   MatMissingDiagonal(A, &missing, &i);

297:   ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
298:   ISGetIndices(isrow, &r);
299:   ISGetIndices(isicol, &ic);

301:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
302:   PetscMalloc1(n + 1, &bi);
303:   PetscMalloc1(n + 1, &bdiag);
304:   bi[0] = bdiag[0] = 0;

306:   /* linked list for storing column indices of the active row */
307:   nlnk = n + 1;
308:   PetscLLCreate(n, n, nlnk, lnk, lnkbt);

310:   PetscMalloc2(n + 1, &bi_ptr, n + 1, &im);

312:   /* initial FreeSpace size is f*(ai[n]+1) */
313:   f = info->fill;
314:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
315:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space);
316:   current_space = free_space;

318:   for (i = 0; i < n; i++) {
319:     /* copy previous fill into linked list */
320:     nzi   = 0;
321:     nnz   = ai[r[i] + 1] - ai[r[i]];
322:     ajtmp = aj + ai[r[i]];
323:     PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt);
324:     nzi += nlnk;

326:     /* add pivot rows into linked list */
327:     row = lnk[n];
328:     while (row < i) {
329:       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
330:       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
331:       PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im);
332:       nzi += nlnk;
333:       row = lnk[row];
334:     }
335:     bi[i + 1] = bi[i] + nzi;
336:     im[i]     = nzi;

338:     /* mark bdiag */
339:     nzbd = 0;
340:     nnz  = nzi;
341:     k    = lnk[n];
342:     while (nnz-- && k < i) {
343:       nzbd++;
344:       k = lnk[k];
345:     }
346:     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

348:     /* if free space is not available, make more free space */
349:     if (current_space->local_remaining < nzi) {
350:       /* estimated additional space needed */
351:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
352:       PetscFreeSpaceGet(nnz, &current_space);
353:       reallocs++;
354:     }

356:     /* copy data into free space, then initialize lnk */
357:     PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt);

359:     bi_ptr[i] = current_space->array;
360:     current_space->array += nzi;
361:     current_space->local_used += nzi;
362:     current_space->local_remaining -= nzi;
363:   }

365:   ISRestoreIndices(isrow, &r);
366:   ISRestoreIndices(isicol, &ic);

368:   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
369:   PetscMalloc1(bi[n] + 1, &bj);
370:   PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag);
371:   PetscLLDestroy(lnk, lnkbt);
372:   PetscFree2(bi_ptr, im);

374:   /* put together the new matrix */
375:   MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL);
376:   b = (Mat_SeqAIJ *)(B)->data;

378:   b->free_a       = PETSC_TRUE;
379:   b->free_ij      = PETSC_TRUE;
380:   b->singlemalloc = PETSC_FALSE;

382:   PetscMalloc1(bdiag[0] + 1, &b->a);

384:   b->j    = bj;
385:   b->i    = bi;
386:   b->diag = bdiag;
387:   b->ilen = NULL;
388:   b->imax = NULL;
389:   b->row  = isrow;
390:   b->col  = iscol;
391:   PetscObjectReference((PetscObject)isrow);
392:   PetscObjectReference((PetscObject)iscol);
393:   b->icol = isicol;
394:   PetscMalloc1(n + 1, &b->solve_work);

396:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
397:   b->maxnz = b->nz = bdiag[0] + 1;

399:   B->factortype            = MAT_FACTOR_LU;
400:   B->info.factor_mallocs   = reallocs;
401:   B->info.fill_ratio_given = f;

403:   if (ai[n]) {
404:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
405:   } else {
406:     B->info.fill_ratio_needed = 0.0;
407:   }
408: #if defined(PETSC_USE_INFO)
409:   if (ai[n] != 0) {
410:     PetscReal af = B->info.fill_ratio_needed;
411:     PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af);
412:     PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
413:     PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af);
414:     PetscInfo(A, "for best performance.\n");
415:   } else {
416:     PetscInfo(A, "Empty matrix\n");
417:   }
418: #endif
419:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
420:   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
421:   MatSeqAIJCheckInode_FactorLU(B);
422:   return 0;
423: }

425: /*
426:     Trouble in factorization, should we dump the original matrix?
427: */
428: PetscErrorCode MatFactorDumpMatrix(Mat A)
429: {
430:   PetscBool flg = PETSC_FALSE;

432:   PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL);
433:   if (flg) {
434:     PetscViewer viewer;
435:     char        filename[PETSC_MAX_PATH_LEN];

437:     PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank);
438:     PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer);
439:     MatView(A, viewer);
440:     PetscViewerDestroy(&viewer);
441:   }
442:   return 0;
443: }

445: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
446: {
447:   Mat              C = B;
448:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
449:   IS               isrow = b->row, isicol = b->icol;
450:   const PetscInt  *r, *ic, *ics;
451:   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
452:   PetscInt         i, j, k, nz, nzL, row, *pj;
453:   const PetscInt  *ajtmp, *bjtmp;
454:   MatScalar       *rtmp, *pc, multiplier, *pv;
455:   const MatScalar *aa = a->a, *v;
456:   PetscBool        row_identity, col_identity;
457:   FactorShiftCtx   sctx;
458:   const PetscInt  *ddiag;
459:   PetscReal        rs;
460:   MatScalar        d;

462:   /* MatPivotSetUp(): initialize shift context sctx */
463:   PetscMemzero(&sctx, sizeof(FactorShiftCtx));

465:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
466:     ddiag          = a->diag;
467:     sctx.shift_top = info->zeropivot;
468:     for (i = 0; i < n; i++) {
469:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
470:       d  = (aa)[ddiag[i]];
471:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
472:       v  = aa + ai[i];
473:       nz = ai[i + 1] - ai[i];
474:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
475:       if (rs > sctx.shift_top) sctx.shift_top = rs;
476:     }
477:     sctx.shift_top *= 1.1;
478:     sctx.nshift_max = 5;
479:     sctx.shift_lo   = 0.;
480:     sctx.shift_hi   = 1.;
481:   }

483:   ISGetIndices(isrow, &r);
484:   ISGetIndices(isicol, &ic);
485:   PetscMalloc1(n + 1, &rtmp);
486:   ics = ic;

488:   do {
489:     sctx.newshift = PETSC_FALSE;
490:     for (i = 0; i < n; i++) {
491:       /* zero rtmp */
492:       /* L part */
493:       nz    = bi[i + 1] - bi[i];
494:       bjtmp = bj + bi[i];
495:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

497:       /* U part */
498:       nz    = bdiag[i] - bdiag[i + 1];
499:       bjtmp = bj + bdiag[i + 1] + 1;
500:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

502:       /* load in initial (unfactored row) */
503:       nz    = ai[r[i] + 1] - ai[r[i]];
504:       ajtmp = aj + ai[r[i]];
505:       v     = aa + ai[r[i]];
506:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
507:       /* ZeropivotApply() */
508:       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */

510:       /* elimination */
511:       bjtmp = bj + bi[i];
512:       row   = *bjtmp++;
513:       nzL   = bi[i + 1] - bi[i];
514:       for (k = 0; k < nzL; k++) {
515:         pc = rtmp + row;
516:         if (*pc != 0.0) {
517:           pv         = b->a + bdiag[row];
518:           multiplier = *pc * (*pv);
519:           *pc        = multiplier;

521:           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
522:           pv = b->a + bdiag[row + 1] + 1;
523:           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */

525:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
526:           PetscLogFlops(1 + 2.0 * nz);
527:         }
528:         row = *bjtmp++;
529:       }

531:       /* finished row so stick it into b->a */
532:       rs = 0.0;
533:       /* L part */
534:       pv = b->a + bi[i];
535:       pj = b->j + bi[i];
536:       nz = bi[i + 1] - bi[i];
537:       for (j = 0; j < nz; j++) {
538:         pv[j] = rtmp[pj[j]];
539:         rs += PetscAbsScalar(pv[j]);
540:       }

542:       /* U part */
543:       pv = b->a + bdiag[i + 1] + 1;
544:       pj = b->j + bdiag[i + 1] + 1;
545:       nz = bdiag[i] - bdiag[i + 1] - 1;
546:       for (j = 0; j < nz; j++) {
547:         pv[j] = rtmp[pj[j]];
548:         rs += PetscAbsScalar(pv[j]);
549:       }

551:       sctx.rs = rs;
552:       sctx.pv = rtmp[i];
553:       MatPivotCheck(B, A, info, &sctx, i);
554:       if (sctx.newshift) break; /* break for-loop */
555:       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */

557:       /* Mark diagonal and invert diagonal for simpler triangular solves */
558:       pv  = b->a + bdiag[i];
559:       *pv = 1.0 / rtmp[i];

561:     } /* endof for (i=0; i<n; i++) { */

563:     /* MatPivotRefine() */
564:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
565:       /*
566:        * if no shift in this attempt & shifting & started shifting & can refine,
567:        * then try lower shift
568:        */
569:       sctx.shift_hi       = sctx.shift_fraction;
570:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
571:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
572:       sctx.newshift       = PETSC_TRUE;
573:       sctx.nshift++;
574:     }
575:   } while (sctx.newshift);

577:   PetscFree(rtmp);
578:   ISRestoreIndices(isicol, &ic);
579:   ISRestoreIndices(isrow, &r);

581:   ISIdentity(isrow, &row_identity);
582:   ISIdentity(isicol, &col_identity);
583:   if (b->inode.size) {
584:     C->ops->solve = MatSolve_SeqAIJ_Inode;
585:   } else if (row_identity && col_identity) {
586:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
587:   } else {
588:     C->ops->solve = MatSolve_SeqAIJ;
589:   }
590:   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
591:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
592:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
593:   C->ops->matsolve          = MatMatSolve_SeqAIJ;
594:   C->assembled              = PETSC_TRUE;
595:   C->preallocated           = PETSC_TRUE;

597:   PetscLogFlops(C->cmap->n);

599:   /* MatShiftView(A,info,&sctx) */
600:   if (sctx.nshift) {
601:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
602:       PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top);
603:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
604:       PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
605:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
606:       PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount);
607:     }
608:   }
609:   return 0;
610: }

612: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
613: {
614:   Mat              C = B;
615:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
616:   IS               isrow = b->row, isicol = b->icol;
617:   const PetscInt  *r, *ic, *ics;
618:   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
619:   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
620:   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
621:   MatScalar       *pv, *rtmp, *pc, multiplier, d;
622:   const MatScalar *v, *aa = a->a;
623:   PetscReal        rs = 0.0;
624:   FactorShiftCtx   sctx;
625:   const PetscInt  *ddiag;
626:   PetscBool        row_identity, col_identity;

628:   /* MatPivotSetUp(): initialize shift context sctx */
629:   PetscMemzero(&sctx, sizeof(FactorShiftCtx));

631:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
632:     ddiag          = a->diag;
633:     sctx.shift_top = info->zeropivot;
634:     for (i = 0; i < n; i++) {
635:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
636:       d  = (aa)[ddiag[i]];
637:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
638:       v  = aa + ai[i];
639:       nz = ai[i + 1] - ai[i];
640:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
641:       if (rs > sctx.shift_top) sctx.shift_top = rs;
642:     }
643:     sctx.shift_top *= 1.1;
644:     sctx.nshift_max = 5;
645:     sctx.shift_lo   = 0.;
646:     sctx.shift_hi   = 1.;
647:   }

649:   ISGetIndices(isrow, &r);
650:   ISGetIndices(isicol, &ic);
651:   PetscMalloc1(n + 1, &rtmp);
652:   ics = ic;

654:   do {
655:     sctx.newshift = PETSC_FALSE;
656:     for (i = 0; i < n; i++) {
657:       nz    = bi[i + 1] - bi[i];
658:       bjtmp = bj + bi[i];
659:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

661:       /* load in initial (unfactored row) */
662:       nz    = ai[r[i] + 1] - ai[r[i]];
663:       ajtmp = aj + ai[r[i]];
664:       v     = aa + ai[r[i]];
665:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
666:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

668:       row = *bjtmp++;
669:       while (row < i) {
670:         pc = rtmp + row;
671:         if (*pc != 0.0) {
672:           pv         = b->a + diag_offset[row];
673:           pj         = b->j + diag_offset[row] + 1;
674:           multiplier = *pc / *pv++;
675:           *pc        = multiplier;
676:           nz         = bi[row + 1] - diag_offset[row] - 1;
677:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
678:           PetscLogFlops(1 + 2.0 * nz);
679:         }
680:         row = *bjtmp++;
681:       }
682:       /* finished row so stick it into b->a */
683:       pv   = b->a + bi[i];
684:       pj   = b->j + bi[i];
685:       nz   = bi[i + 1] - bi[i];
686:       diag = diag_offset[i] - bi[i];
687:       rs   = 0.0;
688:       for (j = 0; j < nz; j++) {
689:         pv[j] = rtmp[pj[j]];
690:         rs += PetscAbsScalar(pv[j]);
691:       }
692:       rs -= PetscAbsScalar(pv[diag]);

694:       sctx.rs = rs;
695:       sctx.pv = pv[diag];
696:       MatPivotCheck(B, A, info, &sctx, i);
697:       if (sctx.newshift) break;
698:       pv[diag] = sctx.pv;
699:     }

701:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
702:       /*
703:        * if no shift in this attempt & shifting & started shifting & can refine,
704:        * then try lower shift
705:        */
706:       sctx.shift_hi       = sctx.shift_fraction;
707:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
708:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
709:       sctx.newshift       = PETSC_TRUE;
710:       sctx.nshift++;
711:     }
712:   } while (sctx.newshift);

714:   /* invert diagonal entries for simpler triangular solves */
715:   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
716:   PetscFree(rtmp);
717:   ISRestoreIndices(isicol, &ic);
718:   ISRestoreIndices(isrow, &r);

720:   ISIdentity(isrow, &row_identity);
721:   ISIdentity(isicol, &col_identity);
722:   if (row_identity && col_identity) {
723:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
724:   } else {
725:     C->ops->solve = MatSolve_SeqAIJ_inplace;
726:   }
727:   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
728:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
729:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
730:   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;

732:   C->assembled    = PETSC_TRUE;
733:   C->preallocated = PETSC_TRUE;

735:   PetscLogFlops(C->cmap->n);
736:   if (sctx.nshift) {
737:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
738:       PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top);
739:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
740:       PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
741:     }
742:   }
743:   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
744:   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;

746:   MatSeqAIJCheckInode(C);
747:   return 0;
748: }

750: /*
751:    This routine implements inplace ILU(0) with row or/and column permutations.
752:    Input:
753:      A - original matrix
754:    Output;
755:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
756:          a->j (col index) is permuted by the inverse of colperm, then sorted
757:          a->a reordered accordingly with a->j
758:          a->diag (ptr to diagonal elements) is updated.
759: */
760: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
761: {
762:   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
763:   IS               isrow = a->row, isicol = a->icol;
764:   const PetscInt  *r, *ic, *ics;
765:   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
766:   PetscInt        *ajtmp, nz, row;
767:   PetscInt        *diag = a->diag, nbdiag, *pj;
768:   PetscScalar     *rtmp, *pc, multiplier, d;
769:   MatScalar       *pv, *v;
770:   PetscReal        rs;
771:   FactorShiftCtx   sctx;
772:   const MatScalar *aa = a->a, *vtmp;


776:   /* MatPivotSetUp(): initialize shift context sctx */
777:   PetscMemzero(&sctx, sizeof(FactorShiftCtx));

779:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
780:     const PetscInt *ddiag = a->diag;
781:     sctx.shift_top        = info->zeropivot;
782:     for (i = 0; i < n; i++) {
783:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
784:       d    = (aa)[ddiag[i]];
785:       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
786:       vtmp = aa + ai[i];
787:       nz   = ai[i + 1] - ai[i];
788:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
789:       if (rs > sctx.shift_top) sctx.shift_top = rs;
790:     }
791:     sctx.shift_top *= 1.1;
792:     sctx.nshift_max = 5;
793:     sctx.shift_lo   = 0.;
794:     sctx.shift_hi   = 1.;
795:   }

797:   ISGetIndices(isrow, &r);
798:   ISGetIndices(isicol, &ic);
799:   PetscMalloc1(n + 1, &rtmp);
800:   PetscArrayzero(rtmp, n + 1);
801:   ics = ic;

803: #if defined(MV)
804:   sctx.shift_top      = 0.;
805:   sctx.nshift_max     = 0;
806:   sctx.shift_lo       = 0.;
807:   sctx.shift_hi       = 0.;
808:   sctx.shift_fraction = 0.;

810:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
811:     sctx.shift_top = 0.;
812:     for (i = 0; i < n; i++) {
813:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
814:       d  = (a->a)[diag[i]];
815:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
816:       v  = a->a + ai[i];
817:       nz = ai[i + 1] - ai[i];
818:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
819:       if (rs > sctx.shift_top) sctx.shift_top = rs;
820:     }
821:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
822:     sctx.shift_top *= 1.1;
823:     sctx.nshift_max = 5;
824:     sctx.shift_lo   = 0.;
825:     sctx.shift_hi   = 1.;
826:   }

828:   sctx.shift_amount = 0.;
829:   sctx.nshift       = 0;
830: #endif

832:   do {
833:     sctx.newshift = PETSC_FALSE;
834:     for (i = 0; i < n; i++) {
835:       /* load in initial unfactored row */
836:       nz    = ai[r[i] + 1] - ai[r[i]];
837:       ajtmp = aj + ai[r[i]];
838:       v     = a->a + ai[r[i]];
839:       /* sort permuted ajtmp and values v accordingly */
840:       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
841:       PetscSortIntWithScalarArray(nz, ajtmp, v);

843:       diag[r[i]] = ai[r[i]];
844:       for (j = 0; j < nz; j++) {
845:         rtmp[ajtmp[j]] = v[j];
846:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
847:       }
848:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

850:       row = *ajtmp++;
851:       while (row < i) {
852:         pc = rtmp + row;
853:         if (*pc != 0.0) {
854:           pv = a->a + diag[r[row]];
855:           pj = aj + diag[r[row]] + 1;

857:           multiplier = *pc / *pv++;
858:           *pc        = multiplier;
859:           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
860:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
861:           PetscLogFlops(1 + 2.0 * nz);
862:         }
863:         row = *ajtmp++;
864:       }
865:       /* finished row so overwrite it onto a->a */
866:       pv     = a->a + ai[r[i]];
867:       pj     = aj + ai[r[i]];
868:       nz     = ai[r[i] + 1] - ai[r[i]];
869:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */

871:       rs = 0.0;
872:       for (j = 0; j < nz; j++) {
873:         pv[j] = rtmp[pj[j]];
874:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
875:       }

877:       sctx.rs = rs;
878:       sctx.pv = pv[nbdiag];
879:       MatPivotCheck(B, A, info, &sctx, i);
880:       if (sctx.newshift) break;
881:       pv[nbdiag] = sctx.pv;
882:     }

884:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
885:       /*
886:        * if no shift in this attempt & shifting & started shifting & can refine,
887:        * then try lower shift
888:        */
889:       sctx.shift_hi       = sctx.shift_fraction;
890:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
891:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
892:       sctx.newshift       = PETSC_TRUE;
893:       sctx.nshift++;
894:     }
895:   } while (sctx.newshift);

897:   /* invert diagonal entries for simpler triangular solves */
898:   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];

900:   PetscFree(rtmp);
901:   ISRestoreIndices(isicol, &ic);
902:   ISRestoreIndices(isrow, &r);

904:   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
905:   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
906:   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
907:   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;

909:   A->assembled    = PETSC_TRUE;
910:   A->preallocated = PETSC_TRUE;

912:   PetscLogFlops(A->cmap->n);
913:   if (sctx.nshift) {
914:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
915:       PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top);
916:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
917:       PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
918:     }
919:   }
920:   return 0;
921: }

923: /* ----------------------------------------------------------- */
924: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
925: {
926:   Mat C;

928:   MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C);
929:   MatLUFactorSymbolic(C, A, row, col, info);
930:   MatLUFactorNumeric(C, A, info);

932:   A->ops->solve          = C->ops->solve;
933:   A->ops->solvetranspose = C->ops->solvetranspose;

935:   MatHeaderMerge(A, &C);
936:   return 0;
937: }
938: /* ----------------------------------------------------------- */

940: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
941: {
942:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
943:   IS                 iscol = a->col, isrow = a->row;
944:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
945:   PetscInt           nz;
946:   const PetscInt    *rout, *cout, *r, *c;
947:   PetscScalar       *x, *tmp, *tmps, sum;
948:   const PetscScalar *b;
949:   const MatScalar   *aa = a->a, *v;

951:   if (!n) return 0;

953:   VecGetArrayRead(bb, &b);
954:   VecGetArrayWrite(xx, &x);
955:   tmp = a->solve_work;

957:   ISGetIndices(isrow, &rout);
958:   r = rout;
959:   ISGetIndices(iscol, &cout);
960:   c = cout + (n - 1);

962:   /* forward solve the lower triangular */
963:   tmp[0] = b[*r++];
964:   tmps   = tmp;
965:   for (i = 1; i < n; i++) {
966:     v   = aa + ai[i];
967:     vi  = aj + ai[i];
968:     nz  = a->diag[i] - ai[i];
969:     sum = b[*r++];
970:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
971:     tmp[i] = sum;
972:   }

974:   /* backward solve the upper triangular */
975:   for (i = n - 1; i >= 0; i--) {
976:     v   = aa + a->diag[i] + 1;
977:     vi  = aj + a->diag[i] + 1;
978:     nz  = ai[i + 1] - a->diag[i] - 1;
979:     sum = tmp[i];
980:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
981:     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
982:   }

984:   ISRestoreIndices(isrow, &rout);
985:   ISRestoreIndices(iscol, &cout);
986:   VecRestoreArrayRead(bb, &b);
987:   VecRestoreArrayWrite(xx, &x);
988:   PetscLogFlops(2.0 * a->nz - A->cmap->n);
989:   return 0;
990: }

992: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
993: {
994:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
995:   IS                 iscol = a->col, isrow = a->row;
996:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
997:   PetscInt           nz, neq, ldb, ldx;
998:   const PetscInt    *rout, *cout, *r, *c;
999:   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1000:   const PetscScalar *b, *aa  = a->a, *v;
1001:   PetscBool          isdense;

1003:   if (!n) return 0;
1004:   PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense);
1006:   if (X != B) {
1007:     PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense);
1009:   }
1010:   MatDenseGetArrayRead(B, &b);
1011:   MatDenseGetLDA(B, &ldb);
1012:   MatDenseGetArray(X, &x);
1013:   MatDenseGetLDA(X, &ldx);
1014:   ISGetIndices(isrow, &rout);
1015:   r = rout;
1016:   ISGetIndices(iscol, &cout);
1017:   c = cout;
1018:   for (neq = 0; neq < B->cmap->n; neq++) {
1019:     /* forward solve the lower triangular */
1020:     tmp[0] = b[r[0]];
1021:     tmps   = tmp;
1022:     for (i = 1; i < n; i++) {
1023:       v   = aa + ai[i];
1024:       vi  = aj + ai[i];
1025:       nz  = a->diag[i] - ai[i];
1026:       sum = b[r[i]];
1027:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1028:       tmp[i] = sum;
1029:     }
1030:     /* backward solve the upper triangular */
1031:     for (i = n - 1; i >= 0; i--) {
1032:       v   = aa + a->diag[i] + 1;
1033:       vi  = aj + a->diag[i] + 1;
1034:       nz  = ai[i + 1] - a->diag[i] - 1;
1035:       sum = tmp[i];
1036:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1037:       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1038:     }
1039:     b += ldb;
1040:     x += ldx;
1041:   }
1042:   ISRestoreIndices(isrow, &rout);
1043:   ISRestoreIndices(iscol, &cout);
1044:   MatDenseRestoreArrayRead(B, &b);
1045:   MatDenseRestoreArray(X, &x);
1046:   PetscLogFlops(B->cmap->n * (2.0 * a->nz - n));
1047:   return 0;
1048: }

1050: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1051: {
1052:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1053:   IS                 iscol = a->col, isrow = a->row;
1054:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1055:   PetscInt           nz, neq, ldb, ldx;
1056:   const PetscInt    *rout, *cout, *r, *c;
1057:   PetscScalar       *x, *tmp = a->solve_work, sum;
1058:   const PetscScalar *b, *aa  = a->a, *v;
1059:   PetscBool          isdense;

1061:   if (!n) return 0;
1062:   PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense);
1064:   if (X != B) {
1065:     PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense);
1067:   }
1068:   MatDenseGetArrayRead(B, &b);
1069:   MatDenseGetLDA(B, &ldb);
1070:   MatDenseGetArray(X, &x);
1071:   MatDenseGetLDA(X, &ldx);
1072:   ISGetIndices(isrow, &rout);
1073:   r = rout;
1074:   ISGetIndices(iscol, &cout);
1075:   c = cout;
1076:   for (neq = 0; neq < B->cmap->n; neq++) {
1077:     /* forward solve the lower triangular */
1078:     tmp[0] = b[r[0]];
1079:     v      = aa;
1080:     vi     = aj;
1081:     for (i = 1; i < n; i++) {
1082:       nz  = ai[i + 1] - ai[i];
1083:       sum = b[r[i]];
1084:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1085:       tmp[i] = sum;
1086:       v += nz;
1087:       vi += nz;
1088:     }
1089:     /* backward solve the upper triangular */
1090:     for (i = n - 1; i >= 0; i--) {
1091:       v   = aa + adiag[i + 1] + 1;
1092:       vi  = aj + adiag[i + 1] + 1;
1093:       nz  = adiag[i] - adiag[i + 1] - 1;
1094:       sum = tmp[i];
1095:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1096:       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1097:     }
1098:     b += ldb;
1099:     x += ldx;
1100:   }
1101:   ISRestoreIndices(isrow, &rout);
1102:   ISRestoreIndices(iscol, &cout);
1103:   MatDenseRestoreArrayRead(B, &b);
1104:   MatDenseRestoreArray(X, &x);
1105:   PetscLogFlops(B->cmap->n * (2.0 * a->nz - n));
1106:   return 0;
1107: }

1109: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1110: {
1111:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1112:   IS                 iscol = a->col, isrow = a->row;
1113:   const PetscInt    *r, *c, *rout, *cout;
1114:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1115:   PetscInt           nz, row;
1116:   PetscScalar       *x, *tmp, *tmps, sum;
1117:   const PetscScalar *b;
1118:   const MatScalar   *aa = a->a, *v;

1120:   if (!n) return 0;

1122:   VecGetArrayRead(bb, &b);
1123:   VecGetArrayWrite(xx, &x);
1124:   tmp = a->solve_work;

1126:   ISGetIndices(isrow, &rout);
1127:   r = rout;
1128:   ISGetIndices(iscol, &cout);
1129:   c = cout + (n - 1);

1131:   /* forward solve the lower triangular */
1132:   tmp[0] = b[*r++];
1133:   tmps   = tmp;
1134:   for (row = 1; row < n; row++) {
1135:     i   = rout[row]; /* permuted row */
1136:     v   = aa + ai[i];
1137:     vi  = aj + ai[i];
1138:     nz  = a->diag[i] - ai[i];
1139:     sum = b[*r++];
1140:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1141:     tmp[row] = sum;
1142:   }

1144:   /* backward solve the upper triangular */
1145:   for (row = n - 1; row >= 0; row--) {
1146:     i   = rout[row]; /* permuted row */
1147:     v   = aa + a->diag[i] + 1;
1148:     vi  = aj + a->diag[i] + 1;
1149:     nz  = ai[i + 1] - a->diag[i] - 1;
1150:     sum = tmp[row];
1151:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1152:     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1153:   }

1155:   ISRestoreIndices(isrow, &rout);
1156:   ISRestoreIndices(iscol, &cout);
1157:   VecRestoreArrayRead(bb, &b);
1158:   VecRestoreArrayWrite(xx, &x);
1159:   PetscLogFlops(2.0 * a->nz - A->cmap->n);
1160:   return 0;
1161: }

1163: /* ----------------------------------------------------------- */
1164: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1165: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1166: {
1167:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1168:   PetscInt           n  = A->rmap->n;
1169:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1170:   PetscScalar       *x;
1171:   const PetscScalar *b;
1172:   const MatScalar   *aa = a->a;
1173: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1174:   PetscInt         adiag_i, i, nz, ai_i;
1175:   const PetscInt  *vi;
1176:   const MatScalar *v;
1177:   PetscScalar      sum;
1178: #endif

1180:   if (!n) return 0;

1182:   VecGetArrayRead(bb, &b);
1183:   VecGetArrayWrite(xx, &x);

1185: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1186:   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1187: #else
1188:   /* forward solve the lower triangular */
1189:   x[0] = b[0];
1190:   for (i = 1; i < n; i++) {
1191:     ai_i = ai[i];
1192:     v    = aa + ai_i;
1193:     vi   = aj + ai_i;
1194:     nz   = adiag[i] - ai_i;
1195:     sum  = b[i];
1196:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1197:     x[i] = sum;
1198:   }

1200:   /* backward solve the upper triangular */
1201:   for (i = n - 1; i >= 0; i--) {
1202:     adiag_i = adiag[i];
1203:     v       = aa + adiag_i + 1;
1204:     vi      = aj + adiag_i + 1;
1205:     nz      = ai[i + 1] - adiag_i - 1;
1206:     sum     = x[i];
1207:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1208:     x[i] = sum * aa[adiag_i];
1209:   }
1210: #endif
1211:   PetscLogFlops(2.0 * a->nz - A->cmap->n);
1212:   VecRestoreArrayRead(bb, &b);
1213:   VecRestoreArrayWrite(xx, &x);
1214:   return 0;
1215: }

1217: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1218: {
1219:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1220:   IS                 iscol = a->col, isrow = a->row;
1221:   PetscInt           i, n                  = A->rmap->n, j;
1222:   PetscInt           nz;
1223:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1224:   PetscScalar       *x, *tmp, sum;
1225:   const PetscScalar *b;
1226:   const MatScalar   *aa = a->a, *v;

1228:   if (yy != xx) VecCopy(yy, xx);

1230:   VecGetArrayRead(bb, &b);
1231:   VecGetArray(xx, &x);
1232:   tmp = a->solve_work;

1234:   ISGetIndices(isrow, &rout);
1235:   r = rout;
1236:   ISGetIndices(iscol, &cout);
1237:   c = cout + (n - 1);

1239:   /* forward solve the lower triangular */
1240:   tmp[0] = b[*r++];
1241:   for (i = 1; i < n; i++) {
1242:     v   = aa + ai[i];
1243:     vi  = aj + ai[i];
1244:     nz  = a->diag[i] - ai[i];
1245:     sum = b[*r++];
1246:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1247:     tmp[i] = sum;
1248:   }

1250:   /* backward solve the upper triangular */
1251:   for (i = n - 1; i >= 0; i--) {
1252:     v   = aa + a->diag[i] + 1;
1253:     vi  = aj + a->diag[i] + 1;
1254:     nz  = ai[i + 1] - a->diag[i] - 1;
1255:     sum = tmp[i];
1256:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1257:     tmp[i] = sum * aa[a->diag[i]];
1258:     x[*c--] += tmp[i];
1259:   }

1261:   ISRestoreIndices(isrow, &rout);
1262:   ISRestoreIndices(iscol, &cout);
1263:   VecRestoreArrayRead(bb, &b);
1264:   VecRestoreArray(xx, &x);
1265:   PetscLogFlops(2.0 * a->nz);
1266:   return 0;
1267: }

1269: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1270: {
1271:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1272:   IS                 iscol = a->col, isrow = a->row;
1273:   PetscInt           i, n                  = A->rmap->n, j;
1274:   PetscInt           nz;
1275:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1276:   PetscScalar       *x, *tmp, sum;
1277:   const PetscScalar *b;
1278:   const MatScalar   *aa = a->a, *v;

1280:   if (yy != xx) VecCopy(yy, xx);

1282:   VecGetArrayRead(bb, &b);
1283:   VecGetArray(xx, &x);
1284:   tmp = a->solve_work;

1286:   ISGetIndices(isrow, &rout);
1287:   r = rout;
1288:   ISGetIndices(iscol, &cout);
1289:   c = cout;

1291:   /* forward solve the lower triangular */
1292:   tmp[0] = b[r[0]];
1293:   v      = aa;
1294:   vi     = aj;
1295:   for (i = 1; i < n; i++) {
1296:     nz  = ai[i + 1] - ai[i];
1297:     sum = b[r[i]];
1298:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1299:     tmp[i] = sum;
1300:     v += nz;
1301:     vi += nz;
1302:   }

1304:   /* backward solve the upper triangular */
1305:   v  = aa + adiag[n - 1];
1306:   vi = aj + adiag[n - 1];
1307:   for (i = n - 1; i >= 0; i--) {
1308:     nz  = adiag[i] - adiag[i + 1] - 1;
1309:     sum = tmp[i];
1310:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1311:     tmp[i] = sum * v[nz];
1312:     x[c[i]] += tmp[i];
1313:     v += nz + 1;
1314:     vi += nz + 1;
1315:   }

1317:   ISRestoreIndices(isrow, &rout);
1318:   ISRestoreIndices(iscol, &cout);
1319:   VecRestoreArrayRead(bb, &b);
1320:   VecRestoreArray(xx, &x);
1321:   PetscLogFlops(2.0 * a->nz);
1322:   return 0;
1323: }

1325: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1326: {
1327:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1328:   IS                 iscol = a->col, isrow = a->row;
1329:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1330:   PetscInt           i, n = A->rmap->n, j;
1331:   PetscInt           nz;
1332:   PetscScalar       *x, *tmp, s1;
1333:   const MatScalar   *aa = a->a, *v;
1334:   const PetscScalar *b;

1336:   VecGetArrayRead(bb, &b);
1337:   VecGetArrayWrite(xx, &x);
1338:   tmp = a->solve_work;

1340:   ISGetIndices(isrow, &rout);
1341:   r = rout;
1342:   ISGetIndices(iscol, &cout);
1343:   c = cout;

1345:   /* copy the b into temp work space according to permutation */
1346:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1348:   /* forward solve the U^T */
1349:   for (i = 0; i < n; i++) {
1350:     v  = aa + diag[i];
1351:     vi = aj + diag[i] + 1;
1352:     nz = ai[i + 1] - diag[i] - 1;
1353:     s1 = tmp[i];
1354:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1355:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1356:     tmp[i] = s1;
1357:   }

1359:   /* backward solve the L^T */
1360:   for (i = n - 1; i >= 0; i--) {
1361:     v  = aa + diag[i] - 1;
1362:     vi = aj + diag[i] - 1;
1363:     nz = diag[i] - ai[i];
1364:     s1 = tmp[i];
1365:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1366:   }

1368:   /* copy tmp into x according to permutation */
1369:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1371:   ISRestoreIndices(isrow, &rout);
1372:   ISRestoreIndices(iscol, &cout);
1373:   VecRestoreArrayRead(bb, &b);
1374:   VecRestoreArrayWrite(xx, &x);

1376:   PetscLogFlops(2.0 * a->nz - A->cmap->n);
1377:   return 0;
1378: }

1380: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1381: {
1382:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1383:   IS                 iscol = a->col, isrow = a->row;
1384:   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1385:   PetscInt           i, n = A->rmap->n, j;
1386:   PetscInt           nz;
1387:   PetscScalar       *x, *tmp, s1;
1388:   const MatScalar   *aa = a->a, *v;
1389:   const PetscScalar *b;

1391:   VecGetArrayRead(bb, &b);
1392:   VecGetArrayWrite(xx, &x);
1393:   tmp = a->solve_work;

1395:   ISGetIndices(isrow, &rout);
1396:   r = rout;
1397:   ISGetIndices(iscol, &cout);
1398:   c = cout;

1400:   /* copy the b into temp work space according to permutation */
1401:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1403:   /* forward solve the U^T */
1404:   for (i = 0; i < n; i++) {
1405:     v  = aa + adiag[i + 1] + 1;
1406:     vi = aj + adiag[i + 1] + 1;
1407:     nz = adiag[i] - adiag[i + 1] - 1;
1408:     s1 = tmp[i];
1409:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1410:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1411:     tmp[i] = s1;
1412:   }

1414:   /* backward solve the L^T */
1415:   for (i = n - 1; i >= 0; i--) {
1416:     v  = aa + ai[i];
1417:     vi = aj + ai[i];
1418:     nz = ai[i + 1] - ai[i];
1419:     s1 = tmp[i];
1420:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1421:   }

1423:   /* copy tmp into x according to permutation */
1424:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1426:   ISRestoreIndices(isrow, &rout);
1427:   ISRestoreIndices(iscol, &cout);
1428:   VecRestoreArrayRead(bb, &b);
1429:   VecRestoreArrayWrite(xx, &x);

1431:   PetscLogFlops(2.0 * a->nz - A->cmap->n);
1432:   return 0;
1433: }

1435: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1436: {
1437:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1438:   IS                 iscol = a->col, isrow = a->row;
1439:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1440:   PetscInt           i, n = A->rmap->n, j;
1441:   PetscInt           nz;
1442:   PetscScalar       *x, *tmp, s1;
1443:   const MatScalar   *aa = a->a, *v;
1444:   const PetscScalar *b;

1446:   if (zz != xx) VecCopy(zz, xx);
1447:   VecGetArrayRead(bb, &b);
1448:   VecGetArray(xx, &x);
1449:   tmp = a->solve_work;

1451:   ISGetIndices(isrow, &rout);
1452:   r = rout;
1453:   ISGetIndices(iscol, &cout);
1454:   c = cout;

1456:   /* copy the b into temp work space according to permutation */
1457:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1459:   /* forward solve the U^T */
1460:   for (i = 0; i < n; i++) {
1461:     v  = aa + diag[i];
1462:     vi = aj + diag[i] + 1;
1463:     nz = ai[i + 1] - diag[i] - 1;
1464:     s1 = tmp[i];
1465:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1466:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1467:     tmp[i] = s1;
1468:   }

1470:   /* backward solve the L^T */
1471:   for (i = n - 1; i >= 0; i--) {
1472:     v  = aa + diag[i] - 1;
1473:     vi = aj + diag[i] - 1;
1474:     nz = diag[i] - ai[i];
1475:     s1 = tmp[i];
1476:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1477:   }

1479:   /* copy tmp into x according to permutation */
1480:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1482:   ISRestoreIndices(isrow, &rout);
1483:   ISRestoreIndices(iscol, &cout);
1484:   VecRestoreArrayRead(bb, &b);
1485:   VecRestoreArray(xx, &x);

1487:   PetscLogFlops(2.0 * a->nz - A->cmap->n);
1488:   return 0;
1489: }

1491: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1492: {
1493:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1494:   IS                 iscol = a->col, isrow = a->row;
1495:   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1496:   PetscInt           i, n = A->rmap->n, j;
1497:   PetscInt           nz;
1498:   PetscScalar       *x, *tmp, s1;
1499:   const MatScalar   *aa = a->a, *v;
1500:   const PetscScalar *b;

1502:   if (zz != xx) VecCopy(zz, xx);
1503:   VecGetArrayRead(bb, &b);
1504:   VecGetArray(xx, &x);
1505:   tmp = a->solve_work;

1507:   ISGetIndices(isrow, &rout);
1508:   r = rout;
1509:   ISGetIndices(iscol, &cout);
1510:   c = cout;

1512:   /* copy the b into temp work space according to permutation */
1513:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1515:   /* forward solve the U^T */
1516:   for (i = 0; i < n; i++) {
1517:     v  = aa + adiag[i + 1] + 1;
1518:     vi = aj + adiag[i + 1] + 1;
1519:     nz = adiag[i] - adiag[i + 1] - 1;
1520:     s1 = tmp[i];
1521:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1522:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1523:     tmp[i] = s1;
1524:   }

1526:   /* backward solve the L^T */
1527:   for (i = n - 1; i >= 0; i--) {
1528:     v  = aa + ai[i];
1529:     vi = aj + ai[i];
1530:     nz = ai[i + 1] - ai[i];
1531:     s1 = tmp[i];
1532:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1533:   }

1535:   /* copy tmp into x according to permutation */
1536:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1538:   ISRestoreIndices(isrow, &rout);
1539:   ISRestoreIndices(iscol, &cout);
1540:   VecRestoreArrayRead(bb, &b);
1541:   VecRestoreArray(xx, &x);

1543:   PetscLogFlops(2.0 * a->nz - A->cmap->n);
1544:   return 0;
1545: }

1547: /* ----------------------------------------------------------------*/

1549: /*
1550:    ilu() under revised new data structure.
1551:    Factored arrays bj and ba are stored as
1552:      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)

1554:    bi=fact->i is an array of size n+1, in which
1555:      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1556:      bi[n]:  points to L(n-1,n-1)+1

1558:   bdiag=fact->diag is an array of size n+1,in which
1559:      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1560:      bdiag[n]: points to entry of U(n-1,0)-1

1562:    U(i,:) contains bdiag[i] as its last entry, i.e.,
1563:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1564: */
1565: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1566: {
1567:   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1568:   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1569:   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1570:   IS             isicol;

1572:   ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
1573:   MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE);
1574:   b = (Mat_SeqAIJ *)(fact)->data;

1576:   /* allocate matrix arrays for new data structure */
1577:   PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i);

1579:   b->singlemalloc = PETSC_TRUE;
1580:   if (!b->diag) { PetscMalloc1(n + 1, &b->diag); }
1581:   bdiag = b->diag;

1583:   if (n > 0) PetscArrayzero(b->a, ai[n]);

1585:   /* set bi and bj with new data structure */
1586:   bi = b->i;
1587:   bj = b->j;

1589:   /* L part */
1590:   bi[0] = 0;
1591:   for (i = 0; i < n; i++) {
1592:     nz        = adiag[i] - ai[i];
1593:     bi[i + 1] = bi[i] + nz;
1594:     aj        = a->j + ai[i];
1595:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1596:   }

1598:   /* U part */
1599:   bdiag[n] = bi[n] - 1;
1600:   for (i = n - 1; i >= 0; i--) {
1601:     nz = ai[i + 1] - adiag[i] - 1;
1602:     aj = a->j + adiag[i] + 1;
1603:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1604:     /* diag[i] */
1605:     bj[k++]  = i;
1606:     bdiag[i] = bdiag[i + 1] + nz + 1;
1607:   }

1609:   fact->factortype             = MAT_FACTOR_ILU;
1610:   fact->info.factor_mallocs    = 0;
1611:   fact->info.fill_ratio_given  = info->fill;
1612:   fact->info.fill_ratio_needed = 1.0;
1613:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1614:   MatSeqAIJCheckInode_FactorLU(fact);

1616:   b       = (Mat_SeqAIJ *)(fact)->data;
1617:   b->row  = isrow;
1618:   b->col  = iscol;
1619:   b->icol = isicol;
1620:   PetscMalloc1(fact->rmap->n + 1, &b->solve_work);
1621:   PetscObjectReference((PetscObject)isrow);
1622:   PetscObjectReference((PetscObject)iscol);
1623:   return 0;
1624: }

1626: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1627: {
1628:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1629:   IS                 isicol;
1630:   const PetscInt    *r, *ic;
1631:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1632:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1633:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1634:   PetscInt           i, levels, diagonal_fill;
1635:   PetscBool          col_identity, row_identity, missing;
1636:   PetscReal          f;
1637:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1638:   PetscBT            lnkbt;
1639:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1640:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1641:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;

1644:   MatMissingDiagonal(A, &missing, &i);

1647:   levels = (PetscInt)info->levels;
1648:   ISIdentity(isrow, &row_identity);
1649:   ISIdentity(iscol, &col_identity);
1650:   if (!levels && row_identity && col_identity) {
1651:     /* special case: ilu(0) with natural ordering */
1652:     MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info);
1653:     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1654:     return 0;
1655:   }

1657:   ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
1658:   ISGetIndices(isrow, &r);
1659:   ISGetIndices(isicol, &ic);

1661:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1662:   PetscMalloc1(n + 1, &bi);
1663:   PetscMalloc1(n + 1, &bdiag);
1664:   bi[0] = bdiag[0] = 0;
1665:   PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr);

1667:   /* create a linked list for storing column indices of the active row */
1668:   nlnk = n + 1;
1669:   PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt);

1671:   /* initial FreeSpace size is f*(ai[n]+1) */
1672:   f             = info->fill;
1673:   diagonal_fill = (PetscInt)info->diagonal_fill;
1674:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space);
1675:   current_space = free_space;
1676:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl);
1677:   current_space_lvl = free_space_lvl;
1678:   for (i = 0; i < n; i++) {
1679:     nzi = 0;
1680:     /* copy current row into linked list */
1681:     nnz = ai[r[i] + 1] - ai[r[i]];
1683:     cols   = aj + ai[r[i]];
1684:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1685:     PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt);
1686:     nzi += nlnk;

1688:     /* make sure diagonal entry is included */
1689:     if (diagonal_fill && lnk[i] == -1) {
1690:       fm = n;
1691:       while (lnk[fm] < i) fm = lnk[fm];
1692:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1693:       lnk[fm]    = i;
1694:       lnk_lvl[i] = 0;
1695:       nzi++;
1696:       dcount++;
1697:     }

1699:     /* add pivot rows into the active row */
1700:     nzbd = 0;
1701:     prow = lnk[n];
1702:     while (prow < i) {
1703:       nnz      = bdiag[prow];
1704:       cols     = bj_ptr[prow] + nnz + 1;
1705:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1706:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1707:       PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow);
1708:       nzi += nlnk;
1709:       prow = lnk[prow];
1710:       nzbd++;
1711:     }
1712:     bdiag[i]  = nzbd;
1713:     bi[i + 1] = bi[i] + nzi;
1714:     /* if free space is not available, make more free space */
1715:     if (current_space->local_remaining < nzi) {
1716:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1717:       PetscFreeSpaceGet(nnz, &current_space);
1718:       PetscFreeSpaceGet(nnz, &current_space_lvl);
1719:       reallocs++;
1720:     }

1722:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1723:     PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt);
1724:     bj_ptr[i]    = current_space->array;
1725:     bjlvl_ptr[i] = current_space_lvl->array;

1727:     /* make sure the active row i has diagonal entry */

1730:     current_space->array += nzi;
1731:     current_space->local_used += nzi;
1732:     current_space->local_remaining -= nzi;
1733:     current_space_lvl->array += nzi;
1734:     current_space_lvl->local_used += nzi;
1735:     current_space_lvl->local_remaining -= nzi;
1736:   }

1738:   ISRestoreIndices(isrow, &r);
1739:   ISRestoreIndices(isicol, &ic);
1740:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1741:   PetscMalloc1(bi[n] + 1, &bj);
1742:   PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag);

1744:   PetscIncompleteLLDestroy(lnk, lnkbt);
1745:   PetscFreeSpaceDestroy(free_space_lvl);
1746:   PetscFree2(bj_ptr, bjlvl_ptr);

1748: #if defined(PETSC_USE_INFO)
1749:   {
1750:     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1751:     PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af);
1752:     PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af);
1753:     PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af);
1754:     PetscInfo(A, "for best performance.\n");
1755:     if (diagonal_fill) PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount);
1756:   }
1757: #endif
1758:   /* put together the new matrix */
1759:   MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL);
1760:   b = (Mat_SeqAIJ *)(fact)->data;

1762:   b->free_a       = PETSC_TRUE;
1763:   b->free_ij      = PETSC_TRUE;
1764:   b->singlemalloc = PETSC_FALSE;

1766:   PetscMalloc1(bdiag[0] + 1, &b->a);

1768:   b->j    = bj;
1769:   b->i    = bi;
1770:   b->diag = bdiag;
1771:   b->ilen = NULL;
1772:   b->imax = NULL;
1773:   b->row  = isrow;
1774:   b->col  = iscol;
1775:   PetscObjectReference((PetscObject)isrow);
1776:   PetscObjectReference((PetscObject)iscol);
1777:   b->icol = isicol;

1779:   PetscMalloc1(n + 1, &b->solve_work);
1780:   /* In b structure:  Free imax, ilen, old a, old j.
1781:      Allocate bdiag, solve_work, new a, new j */
1782:   b->maxnz = b->nz = bdiag[0] + 1;

1784:   (fact)->info.factor_mallocs    = reallocs;
1785:   (fact)->info.fill_ratio_given  = f;
1786:   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1787:   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1788:   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1789:   MatSeqAIJCheckInode_FactorLU(fact);
1790:   return 0;
1791: }

1793: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1794: {
1795:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1796:   IS                 isicol;
1797:   const PetscInt    *r, *ic;
1798:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1799:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1800:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1801:   PetscInt           i, levels, diagonal_fill;
1802:   PetscBool          col_identity, row_identity;
1803:   PetscReal          f;
1804:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1805:   PetscBT            lnkbt;
1806:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1807:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1808:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1809:   PetscBool          missing;

1812:   MatMissingDiagonal(A, &missing, &i);

1815:   f             = info->fill;
1816:   levels        = (PetscInt)info->levels;
1817:   diagonal_fill = (PetscInt)info->diagonal_fill;

1819:   ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);

1821:   ISIdentity(isrow, &row_identity);
1822:   ISIdentity(iscol, &col_identity);
1823:   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1824:     MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE);

1826:     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1827:     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1828:     fact->factortype               = MAT_FACTOR_ILU;
1829:     (fact)->info.factor_mallocs    = 0;
1830:     (fact)->info.fill_ratio_given  = info->fill;
1831:     (fact)->info.fill_ratio_needed = 1.0;

1833:     b       = (Mat_SeqAIJ *)(fact)->data;
1834:     b->row  = isrow;
1835:     b->col  = iscol;
1836:     b->icol = isicol;
1837:     PetscMalloc1((fact)->rmap->n + 1, &b->solve_work);
1838:     PetscObjectReference((PetscObject)isrow);
1839:     PetscObjectReference((PetscObject)iscol);
1840:     return 0;
1841:   }

1843:   ISGetIndices(isrow, &r);
1844:   ISGetIndices(isicol, &ic);

1846:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1847:   PetscMalloc1(n + 1, &bi);
1848:   PetscMalloc1(n + 1, &bdiag);
1849:   bi[0] = bdiag[0] = 0;

1851:   PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr);

1853:   /* create a linked list for storing column indices of the active row */
1854:   nlnk = n + 1;
1855:   PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt);

1857:   /* initial FreeSpace size is f*(ai[n]+1) */
1858:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space);
1859:   current_space = free_space;
1860:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl);
1861:   current_space_lvl = free_space_lvl;

1863:   for (i = 0; i < n; i++) {
1864:     nzi = 0;
1865:     /* copy current row into linked list */
1866:     nnz = ai[r[i] + 1] - ai[r[i]];
1868:     cols   = aj + ai[r[i]];
1869:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1870:     PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt);
1871:     nzi += nlnk;

1873:     /* make sure diagonal entry is included */
1874:     if (diagonal_fill && lnk[i] == -1) {
1875:       fm = n;
1876:       while (lnk[fm] < i) fm = lnk[fm];
1877:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1878:       lnk[fm]    = i;
1879:       lnk_lvl[i] = 0;
1880:       nzi++;
1881:       dcount++;
1882:     }

1884:     /* add pivot rows into the active row */
1885:     nzbd = 0;
1886:     prow = lnk[n];
1887:     while (prow < i) {
1888:       nnz      = bdiag[prow];
1889:       cols     = bj_ptr[prow] + nnz + 1;
1890:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1891:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1892:       PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow);
1893:       nzi += nlnk;
1894:       prow = lnk[prow];
1895:       nzbd++;
1896:     }
1897:     bdiag[i]  = nzbd;
1898:     bi[i + 1] = bi[i] + nzi;

1900:     /* if free space is not available, make more free space */
1901:     if (current_space->local_remaining < nzi) {
1902:       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1903:       PetscFreeSpaceGet(nnz, &current_space);
1904:       PetscFreeSpaceGet(nnz, &current_space_lvl);
1905:       reallocs++;
1906:     }

1908:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1909:     PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt);
1910:     bj_ptr[i]    = current_space->array;
1911:     bjlvl_ptr[i] = current_space_lvl->array;

1913:     /* make sure the active row i has diagonal entry */

1916:     current_space->array += nzi;
1917:     current_space->local_used += nzi;
1918:     current_space->local_remaining -= nzi;
1919:     current_space_lvl->array += nzi;
1920:     current_space_lvl->local_used += nzi;
1921:     current_space_lvl->local_remaining -= nzi;
1922:   }

1924:   ISRestoreIndices(isrow, &r);
1925:   ISRestoreIndices(isicol, &ic);

1927:   /* destroy list of free space and other temporary arrays */
1928:   PetscMalloc1(bi[n] + 1, &bj);
1929:   PetscFreeSpaceContiguous(&free_space, bj); /* copy free_space -> bj */
1930:   PetscIncompleteLLDestroy(lnk, lnkbt);
1931:   PetscFreeSpaceDestroy(free_space_lvl);
1932:   PetscFree2(bj_ptr, bjlvl_ptr);

1934: #if defined(PETSC_USE_INFO)
1935:   {
1936:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1937:     PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af);
1938:     PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af);
1939:     PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af);
1940:     PetscInfo(A, "for best performance.\n");
1941:     if (diagonal_fill) PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount);
1942:   }
1943: #endif

1945:   /* put together the new matrix */
1946:   MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL);
1947:   b = (Mat_SeqAIJ *)(fact)->data;

1949:   b->free_a       = PETSC_TRUE;
1950:   b->free_ij      = PETSC_TRUE;
1951:   b->singlemalloc = PETSC_FALSE;

1953:   PetscMalloc1(bi[n], &b->a);
1954:   b->j = bj;
1955:   b->i = bi;
1956:   for (i = 0; i < n; i++) bdiag[i] += bi[i];
1957:   b->diag = bdiag;
1958:   b->ilen = NULL;
1959:   b->imax = NULL;
1960:   b->row  = isrow;
1961:   b->col  = iscol;
1962:   PetscObjectReference((PetscObject)isrow);
1963:   PetscObjectReference((PetscObject)iscol);
1964:   b->icol = isicol;
1965:   PetscMalloc1(n + 1, &b->solve_work);
1966:   /* In b structure:  Free imax, ilen, old a, old j.
1967:      Allocate bdiag, solve_work, new a, new j */
1968:   b->maxnz = b->nz = bi[n];

1970:   (fact)->info.factor_mallocs    = reallocs;
1971:   (fact)->info.fill_ratio_given  = f;
1972:   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1973:   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
1974:   if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1975:   return 0;
1976: }

1978: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
1979: {
1980:   Mat             C  = B;
1981:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
1982:   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
1983:   IS              ip = b->row, iip = b->icol;
1984:   const PetscInt *rip, *riip;
1985:   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1986:   PetscInt       *ai = a->i, *aj = a->j;
1987:   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1988:   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1989:   PetscBool       perm_identity;
1990:   FactorShiftCtx  sctx;
1991:   PetscReal       rs;
1992:   MatScalar       d, *v;

1994:   /* MatPivotSetUp(): initialize shift context sctx */
1995:   PetscMemzero(&sctx, sizeof(FactorShiftCtx));

1997:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1998:     sctx.shift_top = info->zeropivot;
1999:     for (i = 0; i < mbs; i++) {
2000:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2001:       d  = (aa)[a->diag[i]];
2002:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2003:       v  = aa + ai[i];
2004:       nz = ai[i + 1] - ai[i];
2005:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2006:       if (rs > sctx.shift_top) sctx.shift_top = rs;
2007:     }
2008:     sctx.shift_top *= 1.1;
2009:     sctx.nshift_max = 5;
2010:     sctx.shift_lo   = 0.;
2011:     sctx.shift_hi   = 1.;
2012:   }

2014:   ISGetIndices(ip, &rip);
2015:   ISGetIndices(iip, &riip);

2017:   /* allocate working arrays
2018:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2019:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2020:   */
2021:   PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r);

2023:   do {
2024:     sctx.newshift = PETSC_FALSE;

2026:     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2027:     if (mbs) il[0] = 0;

2029:     for (k = 0; k < mbs; k++) {
2030:       /* zero rtmp */
2031:       nz    = bi[k + 1] - bi[k];
2032:       bjtmp = bj + bi[k];
2033:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

2035:       /* load in initial unfactored row */
2036:       bval = ba + bi[k];
2037:       jmin = ai[rip[k]];
2038:       jmax = ai[rip[k] + 1];
2039:       for (j = jmin; j < jmax; j++) {
2040:         col = riip[aj[j]];
2041:         if (col >= k) { /* only take upper triangular entry */
2042:           rtmp[col] = aa[j];
2043:           *bval++   = 0.0; /* for in-place factorization */
2044:         }
2045:       }
2046:       /* shift the diagonal of the matrix: ZeropivotApply() */
2047:       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */

2049:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2050:       dk = rtmp[k];
2051:       i  = c2r[k]; /* first row to be added to k_th row  */

2053:       while (i < k) {
2054:         nexti = c2r[i]; /* next row to be added to k_th row */

2056:         /* compute multiplier, update diag(k) and U(i,k) */
2057:         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2058:         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2059:         dk += uikdi * ba[ili];           /* update diag[k] */
2060:         ba[ili] = uikdi;                 /* -U(i,k) */

2062:         /* add multiple of row i to k-th row */
2063:         jmin = ili + 1;
2064:         jmax = bi[i + 1];
2065:         if (jmin < jmax) {
2066:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2067:           /* update il and c2r for row i */
2068:           il[i]  = jmin;
2069:           j      = bj[jmin];
2070:           c2r[i] = c2r[j];
2071:           c2r[j] = i;
2072:         }
2073:         i = nexti;
2074:       }

2076:       /* copy data into U(k,:) */
2077:       rs   = 0.0;
2078:       jmin = bi[k];
2079:       jmax = bi[k + 1] - 1;
2080:       if (jmin < jmax) {
2081:         for (j = jmin; j < jmax; j++) {
2082:           col   = bj[j];
2083:           ba[j] = rtmp[col];
2084:           rs += PetscAbsScalar(ba[j]);
2085:         }
2086:         /* add the k-th row into il and c2r */
2087:         il[k]  = jmin;
2088:         i      = bj[jmin];
2089:         c2r[k] = c2r[i];
2090:         c2r[i] = k;
2091:       }

2093:       /* MatPivotCheck() */
2094:       sctx.rs = rs;
2095:       sctx.pv = dk;
2096:       MatPivotCheck(B, A, info, &sctx, i);
2097:       if (sctx.newshift) break;
2098:       dk = sctx.pv;

2100:       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2101:     }
2102:   } while (sctx.newshift);

2104:   PetscFree3(rtmp, il, c2r);
2105:   ISRestoreIndices(ip, &rip);
2106:   ISRestoreIndices(iip, &riip);

2108:   ISIdentity(ip, &perm_identity);
2109:   if (perm_identity) {
2110:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2111:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2112:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2113:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2114:   } else {
2115:     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2116:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2117:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2118:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2119:   }

2121:   C->assembled    = PETSC_TRUE;
2122:   C->preallocated = PETSC_TRUE;

2124:   PetscLogFlops(C->rmap->n);

2126:   /* MatPivotView() */
2127:   if (sctx.nshift) {
2128:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2129:       PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top);
2130:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2131:       PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
2132:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2133:       PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount);
2134:     }
2135:   }
2136:   return 0;
2137: }

2139: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2140: {
2141:   Mat             C  = B;
2142:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2143:   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2144:   IS              ip = b->row, iip = b->icol;
2145:   const PetscInt *rip, *riip;
2146:   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2147:   PetscInt       *ai = a->i, *aj = a->j;
2148:   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2149:   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2150:   PetscBool       perm_identity;
2151:   FactorShiftCtx  sctx;
2152:   PetscReal       rs;
2153:   MatScalar       d, *v;

2155:   /* MatPivotSetUp(): initialize shift context sctx */
2156:   PetscMemzero(&sctx, sizeof(FactorShiftCtx));

2158:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2159:     sctx.shift_top = info->zeropivot;
2160:     for (i = 0; i < mbs; i++) {
2161:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2162:       d  = (aa)[a->diag[i]];
2163:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2164:       v  = aa + ai[i];
2165:       nz = ai[i + 1] - ai[i];
2166:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2167:       if (rs > sctx.shift_top) sctx.shift_top = rs;
2168:     }
2169:     sctx.shift_top *= 1.1;
2170:     sctx.nshift_max = 5;
2171:     sctx.shift_lo   = 0.;
2172:     sctx.shift_hi   = 1.;
2173:   }

2175:   ISGetIndices(ip, &rip);
2176:   ISGetIndices(iip, &riip);

2178:   /* initialization */
2179:   PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl);

2181:   do {
2182:     sctx.newshift = PETSC_FALSE;

2184:     for (i = 0; i < mbs; i++) jl[i] = mbs;
2185:     il[0] = 0;

2187:     for (k = 0; k < mbs; k++) {
2188:       /* zero rtmp */
2189:       nz    = bi[k + 1] - bi[k];
2190:       bjtmp = bj + bi[k];
2191:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

2193:       bval = ba + bi[k];
2194:       /* initialize k-th row by the perm[k]-th row of A */
2195:       jmin = ai[rip[k]];
2196:       jmax = ai[rip[k] + 1];
2197:       for (j = jmin; j < jmax; j++) {
2198:         col = riip[aj[j]];
2199:         if (col >= k) { /* only take upper triangular entry */
2200:           rtmp[col] = aa[j];
2201:           *bval++   = 0.0; /* for in-place factorization */
2202:         }
2203:       }
2204:       /* shift the diagonal of the matrix */
2205:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

2207:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2208:       dk = rtmp[k];
2209:       i  = jl[k]; /* first row to be added to k_th row  */

2211:       while (i < k) {
2212:         nexti = jl[i]; /* next row to be added to k_th row */

2214:         /* compute multiplier, update diag(k) and U(i,k) */
2215:         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2216:         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2217:         dk += uikdi * ba[ili];
2218:         ba[ili] = uikdi; /* -U(i,k) */

2220:         /* add multiple of row i to k-th row */
2221:         jmin = ili + 1;
2222:         jmax = bi[i + 1];
2223:         if (jmin < jmax) {
2224:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2225:           /* update il and jl for row i */
2226:           il[i] = jmin;
2227:           j     = bj[jmin];
2228:           jl[i] = jl[j];
2229:           jl[j] = i;
2230:         }
2231:         i = nexti;
2232:       }

2234:       /* shift the diagonals when zero pivot is detected */
2235:       /* compute rs=sum of abs(off-diagonal) */
2236:       rs   = 0.0;
2237:       jmin = bi[k] + 1;
2238:       nz   = bi[k + 1] - jmin;
2239:       bcol = bj + jmin;
2240:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);

2242:       sctx.rs = rs;
2243:       sctx.pv = dk;
2244:       MatPivotCheck(B, A, info, &sctx, k);
2245:       if (sctx.newshift) break;
2246:       dk = sctx.pv;

2248:       /* copy data into U(k,:) */
2249:       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2250:       jmin      = bi[k] + 1;
2251:       jmax      = bi[k + 1];
2252:       if (jmin < jmax) {
2253:         for (j = jmin; j < jmax; j++) {
2254:           col   = bj[j];
2255:           ba[j] = rtmp[col];
2256:         }
2257:         /* add the k-th row into il and jl */
2258:         il[k] = jmin;
2259:         i     = bj[jmin];
2260:         jl[k] = jl[i];
2261:         jl[i] = k;
2262:       }
2263:     }
2264:   } while (sctx.newshift);

2266:   PetscFree3(rtmp, il, jl);
2267:   ISRestoreIndices(ip, &rip);
2268:   ISRestoreIndices(iip, &riip);

2270:   ISIdentity(ip, &perm_identity);
2271:   if (perm_identity) {
2272:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2273:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2274:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2275:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2276:   } else {
2277:     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2278:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2279:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2280:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2281:   }

2283:   C->assembled    = PETSC_TRUE;
2284:   C->preallocated = PETSC_TRUE;

2286:   PetscLogFlops(C->rmap->n);
2287:   if (sctx.nshift) {
2288:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2289:       PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
2290:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2291:       PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
2292:     }
2293:   }
2294:   return 0;
2295: }

2297: /*
2298:    icc() under revised new data structure.
2299:    Factored arrays bj and ba are stored as
2300:      U(0,:),...,U(i,:),U(n-1,:)

2302:    ui=fact->i is an array of size n+1, in which
2303:    ui+
2304:      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2305:      ui[n]:  points to U(n-1,n-1)+1

2307:   udiag=fact->diag is an array of size n,in which
2308:      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1

2310:    U(i,:) contains udiag[i] as its last entry, i.e.,
2311:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2312: */

2314: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2315: {
2316:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2317:   Mat_SeqSBAIJ      *b;
2318:   PetscBool          perm_identity, missing;
2319:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2320:   const PetscInt    *rip, *riip;
2321:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2322:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2323:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2324:   PetscReal          fill = info->fill, levels = info->levels;
2325:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2326:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2327:   PetscBT            lnkbt;
2328:   IS                 iperm;

2331:   MatMissingDiagonal(A, &missing, &d);
2333:   ISIdentity(perm, &perm_identity);
2334:   ISInvertPermutation(perm, PETSC_DECIDE, &iperm);

2336:   PetscMalloc1(am + 1, &ui);
2337:   PetscMalloc1(am + 1, &udiag);
2338:   ui[0] = 0;

2340:   /* ICC(0) without matrix ordering: simply rearrange column indices */
2341:   if (!levels && perm_identity) {
2342:     for (i = 0; i < am; i++) {
2343:       ncols     = ai[i + 1] - a->diag[i];
2344:       ui[i + 1] = ui[i] + ncols;
2345:       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2346:     }
2347:     PetscMalloc1(ui[am] + 1, &uj);
2348:     cols = uj;
2349:     for (i = 0; i < am; i++) {
2350:       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2351:       ncols = ai[i + 1] - a->diag[i] - 1;
2352:       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2353:       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2354:     }
2355:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2356:     ISGetIndices(iperm, &riip);
2357:     ISGetIndices(perm, &rip);

2359:     /* initialization */
2360:     PetscMalloc1(am + 1, &ajtmp);

2362:     /* jl: linked list for storing indices of the pivot rows
2363:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2364:     PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il);
2365:     for (i = 0; i < am; i++) {
2366:       jl[i] = am;
2367:       il[i] = 0;
2368:     }

2370:     /* create and initialize a linked list for storing column indices of the active row k */
2371:     nlnk = am + 1;
2372:     PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt);

2374:     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2375:     PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space);
2376:     current_space = free_space;
2377:     PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl);
2378:     current_space_lvl = free_space_lvl;

2380:     for (k = 0; k < am; k++) { /* for each active row k */
2381:       /* initialize lnk by the column indices of row rip[k] of A */
2382:       nzk   = 0;
2383:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2385:       ncols_upper = 0;
2386:       for (j = 0; j < ncols; j++) {
2387:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2388:         if (riip[i] >= k) {         /* only take upper triangular entry */
2389:           ajtmp[ncols_upper] = i;
2390:           ncols_upper++;
2391:         }
2392:       }
2393:       PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt);
2394:       nzk += nlnk;

2396:       /* update lnk by computing fill-in for each pivot row to be merged in */
2397:       prow = jl[k]; /* 1st pivot row */

2399:       while (prow < k) {
2400:         nextprow = jl[prow];

2402:         /* merge prow into k-th row */
2403:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2404:         jmax  = ui[prow + 1];
2405:         ncols = jmax - jmin;
2406:         i     = jmin - ui[prow];
2407:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2408:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2409:         j     = *(uj - 1);
2410:         PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j);
2411:         nzk += nlnk;

2413:         /* update il and jl for prow */
2414:         if (jmin < jmax) {
2415:           il[prow] = jmin;
2416:           j        = *cols;
2417:           jl[prow] = jl[j];
2418:           jl[j]    = prow;
2419:         }
2420:         prow = nextprow;
2421:       }

2423:       /* if free space is not available, make more free space */
2424:       if (current_space->local_remaining < nzk) {
2425:         i = am - k + 1;                                    /* num of unfactored rows */
2426:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2427:         PetscFreeSpaceGet(i, &current_space);
2428:         PetscFreeSpaceGet(i, &current_space_lvl);
2429:         reallocs++;
2430:       }

2432:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2434:       PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt);

2436:       /* add the k-th row into il and jl */
2437:       if (nzk > 1) {
2438:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2439:         jl[k] = jl[i];
2440:         jl[i] = k;
2441:         il[k] = ui[k] + 1;
2442:       }
2443:       uj_ptr[k]     = current_space->array;
2444:       uj_lvl_ptr[k] = current_space_lvl->array;

2446:       current_space->array += nzk;
2447:       current_space->local_used += nzk;
2448:       current_space->local_remaining -= nzk;

2450:       current_space_lvl->array += nzk;
2451:       current_space_lvl->local_used += nzk;
2452:       current_space_lvl->local_remaining -= nzk;

2454:       ui[k + 1] = ui[k] + nzk;
2455:     }

2457:     ISRestoreIndices(perm, &rip);
2458:     ISRestoreIndices(iperm, &riip);
2459:     PetscFree4(uj_ptr, uj_lvl_ptr, jl, il);
2460:     PetscFree(ajtmp);

2462:     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2463:     PetscMalloc1(ui[am] + 1, &uj);
2464:     PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag); /* store matrix factor  */
2465:     PetscIncompleteLLDestroy(lnk, lnkbt);
2466:     PetscFreeSpaceDestroy(free_space_lvl);

2468:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2470:   /* put together the new matrix in MATSEQSBAIJ format */
2471:   b               = (Mat_SeqSBAIJ *)(fact)->data;
2472:   b->singlemalloc = PETSC_FALSE;

2474:   PetscMalloc1(ui[am] + 1, &b->a);

2476:   b->j         = uj;
2477:   b->i         = ui;
2478:   b->diag      = udiag;
2479:   b->free_diag = PETSC_TRUE;
2480:   b->ilen      = NULL;
2481:   b->imax      = NULL;
2482:   b->row       = perm;
2483:   b->col       = perm;
2484:   PetscObjectReference((PetscObject)perm);
2485:   PetscObjectReference((PetscObject)perm);
2486:   b->icol          = iperm;
2487:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2489:   PetscMalloc1(am + 1, &b->solve_work);

2491:   b->maxnz = b->nz = ui[am];
2492:   b->free_a        = PETSC_TRUE;
2493:   b->free_ij       = PETSC_TRUE;

2495:   fact->info.factor_mallocs   = reallocs;
2496:   fact->info.fill_ratio_given = fill;
2497:   if (ai[am] != 0) {
2498:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2499:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2500:   } else {
2501:     fact->info.fill_ratio_needed = 0.0;
2502:   }
2503: #if defined(PETSC_USE_INFO)
2504:   if (ai[am] != 0) {
2505:     PetscReal af = fact->info.fill_ratio_needed;
2506:     PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af);
2507:     PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
2508:     PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af);
2509:   } else {
2510:     PetscInfo(A, "Empty matrix\n");
2511:   }
2512: #endif
2513:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2514:   return 0;
2515: }

2517: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2518: {
2519:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2520:   Mat_SeqSBAIJ      *b;
2521:   PetscBool          perm_identity, missing;
2522:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2523:   const PetscInt    *rip, *riip;
2524:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2525:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2526:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2527:   PetscReal          fill = info->fill, levels = info->levels;
2528:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2529:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2530:   PetscBT            lnkbt;
2531:   IS                 iperm;

2534:   MatMissingDiagonal(A, &missing, &d);
2536:   ISIdentity(perm, &perm_identity);
2537:   ISInvertPermutation(perm, PETSC_DECIDE, &iperm);

2539:   PetscMalloc1(am + 1, &ui);
2540:   PetscMalloc1(am + 1, &udiag);
2541:   ui[0] = 0;

2543:   /* ICC(0) without matrix ordering: simply copies fill pattern */
2544:   if (!levels && perm_identity) {
2545:     for (i = 0; i < am; i++) {
2546:       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2547:       udiag[i]  = ui[i];
2548:     }
2549:     PetscMalloc1(ui[am] + 1, &uj);
2550:     cols = uj;
2551:     for (i = 0; i < am; i++) {
2552:       aj    = a->j + a->diag[i];
2553:       ncols = ui[i + 1] - ui[i];
2554:       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2555:     }
2556:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2557:     ISGetIndices(iperm, &riip);
2558:     ISGetIndices(perm, &rip);

2560:     /* initialization */
2561:     PetscMalloc1(am + 1, &ajtmp);

2563:     /* jl: linked list for storing indices of the pivot rows
2564:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2565:     PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il);
2566:     for (i = 0; i < am; i++) {
2567:       jl[i] = am;
2568:       il[i] = 0;
2569:     }

2571:     /* create and initialize a linked list for storing column indices of the active row k */
2572:     nlnk = am + 1;
2573:     PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt);

2575:     /* initial FreeSpace size is fill*(ai[am]+1) */
2576:     PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space);
2577:     current_space = free_space;
2578:     PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl);
2579:     current_space_lvl = free_space_lvl;

2581:     for (k = 0; k < am; k++) { /* for each active row k */
2582:       /* initialize lnk by the column indices of row rip[k] of A */
2583:       nzk   = 0;
2584:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2586:       ncols_upper = 0;
2587:       for (j = 0; j < ncols; j++) {
2588:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2589:         if (riip[i] >= k) {         /* only take upper triangular entry */
2590:           ajtmp[ncols_upper] = i;
2591:           ncols_upper++;
2592:         }
2593:       }
2594:       PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt);
2595:       nzk += nlnk;

2597:       /* update lnk by computing fill-in for each pivot row to be merged in */
2598:       prow = jl[k]; /* 1st pivot row */

2600:       while (prow < k) {
2601:         nextprow = jl[prow];

2603:         /* merge prow into k-th row */
2604:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2605:         jmax  = ui[prow + 1];
2606:         ncols = jmax - jmin;
2607:         i     = jmin - ui[prow];
2608:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2609:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2610:         j     = *(uj - 1);
2611:         PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j);
2612:         nzk += nlnk;

2614:         /* update il and jl for prow */
2615:         if (jmin < jmax) {
2616:           il[prow] = jmin;
2617:           j        = *cols;
2618:           jl[prow] = jl[j];
2619:           jl[j]    = prow;
2620:         }
2621:         prow = nextprow;
2622:       }

2624:       /* if free space is not available, make more free space */
2625:       if (current_space->local_remaining < nzk) {
2626:         i = am - k + 1;                                    /* num of unfactored rows */
2627:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2628:         PetscFreeSpaceGet(i, &current_space);
2629:         PetscFreeSpaceGet(i, &current_space_lvl);
2630:         reallocs++;
2631:       }

2633:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2635:       PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt);

2637:       /* add the k-th row into il and jl */
2638:       if (nzk > 1) {
2639:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2640:         jl[k] = jl[i];
2641:         jl[i] = k;
2642:         il[k] = ui[k] + 1;
2643:       }
2644:       uj_ptr[k]     = current_space->array;
2645:       uj_lvl_ptr[k] = current_space_lvl->array;

2647:       current_space->array += nzk;
2648:       current_space->local_used += nzk;
2649:       current_space->local_remaining -= nzk;

2651:       current_space_lvl->array += nzk;
2652:       current_space_lvl->local_used += nzk;
2653:       current_space_lvl->local_remaining -= nzk;

2655:       ui[k + 1] = ui[k] + nzk;
2656:     }

2658: #if defined(PETSC_USE_INFO)
2659:     if (ai[am] != 0) {
2660:       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2661:       PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af);
2662:       PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
2663:       PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af);
2664:     } else {
2665:       PetscInfo(A, "Empty matrix\n");
2666:     }
2667: #endif

2669:     ISRestoreIndices(perm, &rip);
2670:     ISRestoreIndices(iperm, &riip);
2671:     PetscFree4(uj_ptr, uj_lvl_ptr, jl, il);
2672:     PetscFree(ajtmp);

2674:     /* destroy list of free space and other temporary array(s) */
2675:     PetscMalloc1(ui[am] + 1, &uj);
2676:     PetscFreeSpaceContiguous(&free_space, uj);
2677:     PetscIncompleteLLDestroy(lnk, lnkbt);
2678:     PetscFreeSpaceDestroy(free_space_lvl);

2680:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2682:   /* put together the new matrix in MATSEQSBAIJ format */

2684:   b               = (Mat_SeqSBAIJ *)fact->data;
2685:   b->singlemalloc = PETSC_FALSE;

2687:   PetscMalloc1(ui[am] + 1, &b->a);

2689:   b->j         = uj;
2690:   b->i         = ui;
2691:   b->diag      = udiag;
2692:   b->free_diag = PETSC_TRUE;
2693:   b->ilen      = NULL;
2694:   b->imax      = NULL;
2695:   b->row       = perm;
2696:   b->col       = perm;

2698:   PetscObjectReference((PetscObject)perm);
2699:   PetscObjectReference((PetscObject)perm);

2701:   b->icol          = iperm;
2702:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2703:   PetscMalloc1(am + 1, &b->solve_work);
2704:   b->maxnz = b->nz = ui[am];
2705:   b->free_a        = PETSC_TRUE;
2706:   b->free_ij       = PETSC_TRUE;

2708:   fact->info.factor_mallocs   = reallocs;
2709:   fact->info.fill_ratio_given = fill;
2710:   if (ai[am] != 0) {
2711:     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2712:   } else {
2713:     fact->info.fill_ratio_needed = 0.0;
2714:   }
2715:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2716:   return 0;
2717: }

2719: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2720: {
2721:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2722:   Mat_SeqSBAIJ      *b;
2723:   PetscBool          perm_identity, missing;
2724:   PetscReal          fill = info->fill;
2725:   const PetscInt    *rip, *riip;
2726:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2727:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2728:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2729:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2730:   PetscBT            lnkbt;
2731:   IS                 iperm;

2734:   MatMissingDiagonal(A, &missing, &i);

2737:   /* check whether perm is the identity mapping */
2738:   ISIdentity(perm, &perm_identity);
2739:   ISInvertPermutation(perm, PETSC_DECIDE, &iperm);
2740:   ISGetIndices(iperm, &riip);
2741:   ISGetIndices(perm, &rip);

2743:   /* initialization */
2744:   PetscMalloc1(am + 1, &ui);
2745:   PetscMalloc1(am + 1, &udiag);
2746:   ui[0] = 0;

2748:   /* jl: linked list for storing indices of the pivot rows
2749:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2750:   PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols);
2751:   for (i = 0; i < am; i++) {
2752:     jl[i] = am;
2753:     il[i] = 0;
2754:   }

2756:   /* create and initialize a linked list for storing column indices of the active row k */
2757:   nlnk = am + 1;
2758:   PetscLLCreate(am, am, nlnk, lnk, lnkbt);

2760:   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2761:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space);
2762:   current_space = free_space;

2764:   for (k = 0; k < am; k++) { /* for each active row k */
2765:     /* initialize lnk by the column indices of row rip[k] of A */
2766:     nzk   = 0;
2767:     ncols = ai[rip[k] + 1] - ai[rip[k]];
2769:     ncols_upper = 0;
2770:     for (j = 0; j < ncols; j++) {
2771:       i = riip[*(aj + ai[rip[k]] + j)];
2772:       if (i >= k) { /* only take upper triangular entry */
2773:         cols[ncols_upper] = i;
2774:         ncols_upper++;
2775:       }
2776:     }
2777:     PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt);
2778:     nzk += nlnk;

2780:     /* update lnk by computing fill-in for each pivot row to be merged in */
2781:     prow = jl[k]; /* 1st pivot row */

2783:     while (prow < k) {
2784:       nextprow = jl[prow];
2785:       /* merge prow into k-th row */
2786:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2787:       jmax   = ui[prow + 1];
2788:       ncols  = jmax - jmin;
2789:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2790:       PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt);
2791:       nzk += nlnk;

2793:       /* update il and jl for prow */
2794:       if (jmin < jmax) {
2795:         il[prow] = jmin;
2796:         j        = *uj_ptr;
2797:         jl[prow] = jl[j];
2798:         jl[j]    = prow;
2799:       }
2800:       prow = nextprow;
2801:     }

2803:     /* if free space is not available, make more free space */
2804:     if (current_space->local_remaining < nzk) {
2805:       i = am - k + 1;                                    /* num of unfactored rows */
2806:       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2807:       PetscFreeSpaceGet(i, &current_space);
2808:       reallocs++;
2809:     }

2811:     /* copy data into free space, then initialize lnk */
2812:     PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt);

2814:     /* add the k-th row into il and jl */
2815:     if (nzk > 1) {
2816:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2817:       jl[k] = jl[i];
2818:       jl[i] = k;
2819:       il[k] = ui[k] + 1;
2820:     }
2821:     ui_ptr[k] = current_space->array;

2823:     current_space->array += nzk;
2824:     current_space->local_used += nzk;
2825:     current_space->local_remaining -= nzk;

2827:     ui[k + 1] = ui[k] + nzk;
2828:   }

2830:   ISRestoreIndices(perm, &rip);
2831:   ISRestoreIndices(iperm, &riip);
2832:   PetscFree4(ui_ptr, jl, il, cols);

2834:   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2835:   PetscMalloc1(ui[am] + 1, &uj);
2836:   PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag); /* store matrix factor */
2837:   PetscLLDestroy(lnk, lnkbt);

2839:   /* put together the new matrix in MATSEQSBAIJ format */

2841:   b               = (Mat_SeqSBAIJ *)fact->data;
2842:   b->singlemalloc = PETSC_FALSE;
2843:   b->free_a       = PETSC_TRUE;
2844:   b->free_ij      = PETSC_TRUE;

2846:   PetscMalloc1(ui[am] + 1, &b->a);

2848:   b->j         = uj;
2849:   b->i         = ui;
2850:   b->diag      = udiag;
2851:   b->free_diag = PETSC_TRUE;
2852:   b->ilen      = NULL;
2853:   b->imax      = NULL;
2854:   b->row       = perm;
2855:   b->col       = perm;

2857:   PetscObjectReference((PetscObject)perm);
2858:   PetscObjectReference((PetscObject)perm);

2860:   b->icol          = iperm;
2861:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2863:   PetscMalloc1(am + 1, &b->solve_work);

2865:   b->maxnz = b->nz = ui[am];

2867:   fact->info.factor_mallocs   = reallocs;
2868:   fact->info.fill_ratio_given = fill;
2869:   if (ai[am] != 0) {
2870:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2871:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2872:   } else {
2873:     fact->info.fill_ratio_needed = 0.0;
2874:   }
2875: #if defined(PETSC_USE_INFO)
2876:   if (ai[am] != 0) {
2877:     PetscReal af = fact->info.fill_ratio_needed;
2878:     PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af);
2879:     PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
2880:     PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af);
2881:   } else {
2882:     PetscInfo(A, "Empty matrix\n");
2883:   }
2884: #endif
2885:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2886:   return 0;
2887: }

2889: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2890: {
2891:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2892:   Mat_SeqSBAIJ      *b;
2893:   PetscBool          perm_identity, missing;
2894:   PetscReal          fill = info->fill;
2895:   const PetscInt    *rip, *riip;
2896:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2897:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2898:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2899:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2900:   PetscBT            lnkbt;
2901:   IS                 iperm;

2904:   MatMissingDiagonal(A, &missing, &i);

2907:   /* check whether perm is the identity mapping */
2908:   ISIdentity(perm, &perm_identity);
2909:   ISInvertPermutation(perm, PETSC_DECIDE, &iperm);
2910:   ISGetIndices(iperm, &riip);
2911:   ISGetIndices(perm, &rip);

2913:   /* initialization */
2914:   PetscMalloc1(am + 1, &ui);
2915:   ui[0] = 0;

2917:   /* jl: linked list for storing indices of the pivot rows
2918:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2919:   PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols);
2920:   for (i = 0; i < am; i++) {
2921:     jl[i] = am;
2922:     il[i] = 0;
2923:   }

2925:   /* create and initialize a linked list for storing column indices of the active row k */
2926:   nlnk = am + 1;
2927:   PetscLLCreate(am, am, nlnk, lnk, lnkbt);

2929:   /* initial FreeSpace size is fill*(ai[am]+1) */
2930:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space);
2931:   current_space = free_space;

2933:   for (k = 0; k < am; k++) { /* for each active row k */
2934:     /* initialize lnk by the column indices of row rip[k] of A */
2935:     nzk   = 0;
2936:     ncols = ai[rip[k] + 1] - ai[rip[k]];
2938:     ncols_upper = 0;
2939:     for (j = 0; j < ncols; j++) {
2940:       i = riip[*(aj + ai[rip[k]] + j)];
2941:       if (i >= k) { /* only take upper triangular entry */
2942:         cols[ncols_upper] = i;
2943:         ncols_upper++;
2944:       }
2945:     }
2946:     PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt);
2947:     nzk += nlnk;

2949:     /* update lnk by computing fill-in for each pivot row to be merged in */
2950:     prow = jl[k]; /* 1st pivot row */

2952:     while (prow < k) {
2953:       nextprow = jl[prow];
2954:       /* merge prow into k-th row */
2955:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2956:       jmax   = ui[prow + 1];
2957:       ncols  = jmax - jmin;
2958:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2959:       PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt);
2960:       nzk += nlnk;

2962:       /* update il and jl for prow */
2963:       if (jmin < jmax) {
2964:         il[prow] = jmin;
2965:         j        = *uj_ptr;
2966:         jl[prow] = jl[j];
2967:         jl[j]    = prow;
2968:       }
2969:       prow = nextprow;
2970:     }

2972:     /* if free space is not available, make more free space */
2973:     if (current_space->local_remaining < nzk) {
2974:       i = am - k + 1;                     /* num of unfactored rows */
2975:       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2976:       PetscFreeSpaceGet(i, &current_space);
2977:       reallocs++;
2978:     }

2980:     /* copy data into free space, then initialize lnk */
2981:     PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt);

2983:     /* add the k-th row into il and jl */
2984:     if (nzk - 1 > 0) {
2985:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2986:       jl[k] = jl[i];
2987:       jl[i] = k;
2988:       il[k] = ui[k] + 1;
2989:     }
2990:     ui_ptr[k] = current_space->array;

2992:     current_space->array += nzk;
2993:     current_space->local_used += nzk;
2994:     current_space->local_remaining -= nzk;

2996:     ui[k + 1] = ui[k] + nzk;
2997:   }

2999: #if defined(PETSC_USE_INFO)
3000:   if (ai[am] != 0) {
3001:     PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
3002:     PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af);
3003:     PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
3004:     PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af);
3005:   } else {
3006:     PetscInfo(A, "Empty matrix\n");
3007:   }
3008: #endif

3010:   ISRestoreIndices(perm, &rip);
3011:   ISRestoreIndices(iperm, &riip);
3012:   PetscFree4(ui_ptr, jl, il, cols);

3014:   /* destroy list of free space and other temporary array(s) */
3015:   PetscMalloc1(ui[am] + 1, &uj);
3016:   PetscFreeSpaceContiguous(&free_space, uj);
3017:   PetscLLDestroy(lnk, lnkbt);

3019:   /* put together the new matrix in MATSEQSBAIJ format */

3021:   b               = (Mat_SeqSBAIJ *)fact->data;
3022:   b->singlemalloc = PETSC_FALSE;
3023:   b->free_a       = PETSC_TRUE;
3024:   b->free_ij      = PETSC_TRUE;

3026:   PetscMalloc1(ui[am] + 1, &b->a);

3028:   b->j    = uj;
3029:   b->i    = ui;
3030:   b->diag = NULL;
3031:   b->ilen = NULL;
3032:   b->imax = NULL;
3033:   b->row  = perm;
3034:   b->col  = perm;

3036:   PetscObjectReference((PetscObject)perm);
3037:   PetscObjectReference((PetscObject)perm);

3039:   b->icol          = iperm;
3040:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

3042:   PetscMalloc1(am + 1, &b->solve_work);
3043:   b->maxnz = b->nz = ui[am];

3045:   fact->info.factor_mallocs   = reallocs;
3046:   fact->info.fill_ratio_given = fill;
3047:   if (ai[am] != 0) {
3048:     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
3049:   } else {
3050:     fact->info.fill_ratio_needed = 0.0;
3051:   }
3052:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3053:   return 0;
3054: }

3056: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3057: {
3058:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3059:   PetscInt           n  = A->rmap->n;
3060:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3061:   PetscScalar       *x, sum;
3062:   const PetscScalar *b;
3063:   const MatScalar   *aa = a->a, *v;
3064:   PetscInt           i, nz;

3066:   if (!n) return 0;

3068:   VecGetArrayRead(bb, &b);
3069:   VecGetArrayWrite(xx, &x);

3071:   /* forward solve the lower triangular */
3072:   x[0] = b[0];
3073:   v    = aa;
3074:   vi   = aj;
3075:   for (i = 1; i < n; i++) {
3076:     nz  = ai[i + 1] - ai[i];
3077:     sum = b[i];
3078:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3079:     v += nz;
3080:     vi += nz;
3081:     x[i] = sum;
3082:   }

3084:   /* backward solve the upper triangular */
3085:   for (i = n - 1; i >= 0; i--) {
3086:     v   = aa + adiag[i + 1] + 1;
3087:     vi  = aj + adiag[i + 1] + 1;
3088:     nz  = adiag[i] - adiag[i + 1] - 1;
3089:     sum = x[i];
3090:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3091:     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3092:   }

3094:   PetscLogFlops(2.0 * a->nz - A->cmap->n);
3095:   VecRestoreArrayRead(bb, &b);
3096:   VecRestoreArrayWrite(xx, &x);
3097:   return 0;
3098: }

3100: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3101: {
3102:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3103:   IS                 iscol = a->col, isrow = a->row;
3104:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3105:   const PetscInt    *rout, *cout, *r, *c;
3106:   PetscScalar       *x, *tmp, sum;
3107:   const PetscScalar *b;
3108:   const MatScalar   *aa = a->a, *v;

3110:   if (!n) return 0;

3112:   VecGetArrayRead(bb, &b);
3113:   VecGetArrayWrite(xx, &x);
3114:   tmp = a->solve_work;

3116:   ISGetIndices(isrow, &rout);
3117:   r = rout;
3118:   ISGetIndices(iscol, &cout);
3119:   c = cout;

3121:   /* forward solve the lower triangular */
3122:   tmp[0] = b[r[0]];
3123:   v      = aa;
3124:   vi     = aj;
3125:   for (i = 1; i < n; i++) {
3126:     nz  = ai[i + 1] - ai[i];
3127:     sum = b[r[i]];
3128:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3129:     tmp[i] = sum;
3130:     v += nz;
3131:     vi += nz;
3132:   }

3134:   /* backward solve the upper triangular */
3135:   for (i = n - 1; i >= 0; i--) {
3136:     v   = aa + adiag[i + 1] + 1;
3137:     vi  = aj + adiag[i + 1] + 1;
3138:     nz  = adiag[i] - adiag[i + 1] - 1;
3139:     sum = tmp[i];
3140:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3141:     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3142:   }

3144:   ISRestoreIndices(isrow, &rout);
3145:   ISRestoreIndices(iscol, &cout);
3146:   VecRestoreArrayRead(bb, &b);
3147:   VecRestoreArrayWrite(xx, &x);
3148:   PetscLogFlops(2.0 * a->nz - A->cmap->n);
3149:   return 0;
3150: }

3152: /*
3153:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3154: */
3155: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3156: {
3157:   Mat             B = *fact;
3158:   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3159:   IS              isicol;
3160:   const PetscInt *r, *ic;
3161:   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3162:   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3163:   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3164:   PetscInt        nlnk, *lnk;
3165:   PetscBT         lnkbt;
3166:   PetscBool       row_identity, icol_identity;
3167:   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3168:   const PetscInt *ics;
3169:   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3170:   PetscReal       dt = info->dt, shift = info->shiftamount;
3171:   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3172:   PetscBool       missing;

3174:   if (dt == PETSC_DEFAULT) dt = 0.005;
3175:   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);

3177:   /* ------- symbolic factorization, can be reused ---------*/
3178:   MatMissingDiagonal(A, &missing, &i);
3180:   adiag = a->diag;

3182:   ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);

3184:   /* bdiag is location of diagonal in factor */
3185:   PetscMalloc1(n + 1, &bdiag);     /* becomes b->diag */
3186:   PetscMalloc1(n + 1, &bdiag_rev); /* temporary */

3188:   /* allocate row pointers bi */
3189:   PetscMalloc1(2 * n + 2, &bi);

3191:   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3192:   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3193:   nnz_max = ai[n] + 2 * n * dtcount + 2;

3195:   PetscMalloc1(nnz_max + 1, &bj);
3196:   PetscMalloc1(nnz_max + 1, &ba);

3198:   /* put together the new matrix */
3199:   MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL);
3200:   b = (Mat_SeqAIJ *)B->data;

3202:   b->free_a       = PETSC_TRUE;
3203:   b->free_ij      = PETSC_TRUE;
3204:   b->singlemalloc = PETSC_FALSE;

3206:   b->a    = ba;
3207:   b->j    = bj;
3208:   b->i    = bi;
3209:   b->diag = bdiag;
3210:   b->ilen = NULL;
3211:   b->imax = NULL;
3212:   b->row  = isrow;
3213:   b->col  = iscol;
3214:   PetscObjectReference((PetscObject)isrow);
3215:   PetscObjectReference((PetscObject)iscol);
3216:   b->icol = isicol;

3218:   PetscMalloc1(n + 1, &b->solve_work);
3219:   b->maxnz = nnz_max;

3221:   B->factortype            = MAT_FACTOR_ILUDT;
3222:   B->info.factor_mallocs   = 0;
3223:   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3224:   /* ------- end of symbolic factorization ---------*/

3226:   ISGetIndices(isrow, &r);
3227:   ISGetIndices(isicol, &ic);
3228:   ics = ic;

3230:   /* linked list for storing column indices of the active row */
3231:   nlnk = n + 1;
3232:   PetscLLCreate(n, n, nlnk, lnk, lnkbt);

3234:   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3235:   PetscMalloc2(n, &im, n, &jtmp);
3236:   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3237:   PetscMalloc2(n, &rtmp, n, &vtmp);
3238:   PetscArrayzero(rtmp, n);

3240:   bi[0]         = 0;
3241:   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3242:   bdiag_rev[n]  = bdiag[0];
3243:   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3244:   for (i = 0; i < n; i++) {
3245:     /* copy initial fill into linked list */
3246:     nzi = ai[r[i] + 1] - ai[r[i]];
3248:     nzi_al = adiag[r[i]] - ai[r[i]];
3249:     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3250:     ajtmp  = aj + ai[r[i]];
3251:     PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt);

3253:     /* load in initial (unfactored row) */
3254:     aatmp = a->a + ai[r[i]];
3255:     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;

3257:     /* add pivot rows into linked list */
3258:     row = lnk[n];
3259:     while (row < i) {
3260:       nzi_bl = bi[row + 1] - bi[row] + 1;
3261:       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3262:       PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im);
3263:       nzi += nlnk;
3264:       row = lnk[row];
3265:     }

3267:     /* copy data from lnk into jtmp, then initialize lnk */
3268:     PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt);

3270:     /* numerical factorization */
3271:     bjtmp = jtmp;
3272:     row   = *bjtmp++; /* 1st pivot row */
3273:     while (row < i) {
3274:       pc         = rtmp + row;
3275:       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3276:       multiplier = (*pc) * (*pv);
3277:       *pc        = multiplier;
3278:       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3279:         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3280:         pv = ba + bdiag[row + 1] + 1;
3281:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3282:         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3283:         PetscLogFlops(1 + 2.0 * nz);
3284:       }
3285:       row = *bjtmp++;
3286:     }

3288:     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3289:     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3290:     nzi_bl   = 0;
3291:     j        = 0;
3292:     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3293:       vtmp[j]       = rtmp[jtmp[j]];
3294:       rtmp[jtmp[j]] = 0.0;
3295:       nzi_bl++;
3296:       j++;
3297:     }
3298:     nzi_bu = nzi - nzi_bl - 1;
3299:     while (j < nzi) {
3300:       vtmp[j]       = rtmp[jtmp[j]];
3301:       rtmp[jtmp[j]] = 0.0;
3302:       j++;
3303:     }

3305:     bjtmp = bj + bi[i];
3306:     batmp = ba + bi[i];
3307:     /* apply level dropping rule to L part */
3308:     ncut = nzi_al + dtcount;
3309:     if (ncut < nzi_bl) {
3310:       PetscSortSplit(ncut, nzi_bl, vtmp, jtmp);
3311:       PetscSortIntWithScalarArray(ncut, jtmp, vtmp);
3312:     } else {
3313:       ncut = nzi_bl;
3314:     }
3315:     for (j = 0; j < ncut; j++) {
3316:       bjtmp[j] = jtmp[j];
3317:       batmp[j] = vtmp[j];
3318:     }
3319:     bi[i + 1] = bi[i] + ncut;
3320:     nzi       = ncut + 1;

3322:     /* apply level dropping rule to U part */
3323:     ncut = nzi_au + dtcount;
3324:     if (ncut < nzi_bu) {
3325:       PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1);
3326:       PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1);
3327:     } else {
3328:       ncut = nzi_bu;
3329:     }
3330:     nzi += ncut;

3332:     /* mark bdiagonal */
3333:     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3334:     bdiag_rev[n - i - 1] = bdiag[i + 1];
3335:     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3336:     bjtmp                = bj + bdiag[i];
3337:     batmp                = ba + bdiag[i];
3338:     *bjtmp               = i;
3339:     *batmp               = diag_tmp; /* rtmp[i]; */
3340:     if (*batmp == 0.0) *batmp = dt + shift;
3341:     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */

3343:     bjtmp = bj + bdiag[i + 1] + 1;
3344:     batmp = ba + bdiag[i + 1] + 1;
3345:     for (k = 0; k < ncut; k++) {
3346:       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3347:       batmp[k] = vtmp[nzi_bl + 1 + k];
3348:     }

3350:     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3351:   }              /* for (i=0; i<n; i++) */

3354:   ISRestoreIndices(isrow, &r);
3355:   ISRestoreIndices(isicol, &ic);

3357:   PetscLLDestroy(lnk, lnkbt);
3358:   PetscFree2(im, jtmp);
3359:   PetscFree2(rtmp, vtmp);
3360:   PetscFree(bdiag_rev);

3362:   PetscLogFlops(B->cmap->n);
3363:   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];

3365:   ISIdentity(isrow, &row_identity);
3366:   ISIdentity(isicol, &icol_identity);
3367:   if (row_identity && icol_identity) {
3368:     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3369:   } else {
3370:     B->ops->solve = MatSolve_SeqAIJ;
3371:   }

3373:   B->ops->solveadd          = NULL;
3374:   B->ops->solvetranspose    = NULL;
3375:   B->ops->solvetransposeadd = NULL;
3376:   B->ops->matsolve          = NULL;
3377:   B->assembled              = PETSC_TRUE;
3378:   B->preallocated           = PETSC_TRUE;
3379:   return 0;
3380: }

3382: /* a wrapper of MatILUDTFactor_SeqAIJ() */
3383: /*
3384:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3385: */

3387: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3388: {
3389:   MatILUDTFactor_SeqAIJ(A, row, col, info, &fact);
3390:   return 0;
3391: }

3393: /*
3394:    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3395:    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3396: */
3397: /*
3398:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3399: */

3401: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3402: {
3403:   Mat             C = fact;
3404:   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3405:   IS              isrow = b->row, isicol = b->icol;
3406:   const PetscInt *r, *ic, *ics;
3407:   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3408:   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3409:   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3410:   PetscReal       dt = info->dt, shift = info->shiftamount;
3411:   PetscBool       row_identity, col_identity;

3413:   ISGetIndices(isrow, &r);
3414:   ISGetIndices(isicol, &ic);
3415:   PetscMalloc1(n + 1, &rtmp);
3416:   ics = ic;

3418:   for (i = 0; i < n; i++) {
3419:     /* initialize rtmp array */
3420:     nzl   = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */
3421:     bjtmp = bj + bi[i];
3422:     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3423:     rtmp[i] = 0.0;
3424:     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */
3425:     bjtmp   = bj + bdiag[i + 1] + 1;
3426:     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;

3428:     /* load in initial unfactored row of A */
3429:     nz    = ai[r[i] + 1] - ai[r[i]];
3430:     ajtmp = aj + ai[r[i]];
3431:     v     = aa + ai[r[i]];
3432:     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];

3434:     /* numerical factorization */
3435:     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3436:     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3437:     k     = 0;
3438:     while (k < nzl) {
3439:       row        = *bjtmp++;
3440:       pc         = rtmp + row;
3441:       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3442:       multiplier = (*pc) * (*pv);
3443:       *pc        = multiplier;
3444:       if (PetscAbsScalar(multiplier) > dt) {
3445:         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3446:         pv = b->a + bdiag[row + 1] + 1;
3447:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3448:         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3449:         PetscLogFlops(1 + 2.0 * nz);
3450:       }
3451:       k++;
3452:     }

3454:     /* finished row so stick it into b->a */
3455:     /* L-part */
3456:     pv  = b->a + bi[i];
3457:     pj  = bj + bi[i];
3458:     nzl = bi[i + 1] - bi[i];
3459:     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];

3461:     /* diagonal: invert diagonal entries for simpler triangular solves */
3462:     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3463:     b->a[bdiag[i]] = 1.0 / rtmp[i];

3465:     /* U-part */
3466:     pv  = b->a + bdiag[i + 1] + 1;
3467:     pj  = bj + bdiag[i + 1] + 1;
3468:     nzu = bdiag[i] - bdiag[i + 1] - 1;
3469:     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3470:   }

3472:   PetscFree(rtmp);
3473:   ISRestoreIndices(isicol, &ic);
3474:   ISRestoreIndices(isrow, &r);

3476:   ISIdentity(isrow, &row_identity);
3477:   ISIdentity(isicol, &col_identity);
3478:   if (row_identity && col_identity) {
3479:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3480:   } else {
3481:     C->ops->solve = MatSolve_SeqAIJ;
3482:   }
3483:   C->ops->solveadd          = NULL;
3484:   C->ops->solvetranspose    = NULL;
3485:   C->ops->solvetransposeadd = NULL;
3486:   C->ops->matsolve          = NULL;
3487:   C->assembled              = PETSC_TRUE;
3488:   C->preallocated           = PETSC_TRUE;

3490:   PetscLogFlops(C->cmap->n);
3491:   return 0;
3492: }