Actual source code: aijfact.c
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
7: /*
8: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
10: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11: */
12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13: {
14: Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data;
15: PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
16: const PetscInt *ai = a->i, *aj = a->j;
17: const PetscScalar *aa = a->a;
18: PetscBool *done;
19: PetscReal best, past = 0, future;
21: /* pick initial row */
22: best = -1;
23: for (i = 0; i < n; i++) {
24: future = 0.0;
25: for (j = ai[i]; j < ai[i + 1]; j++) {
26: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
27: else past = PetscAbsScalar(aa[j]);
28: }
29: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
30: if (past / future > best) {
31: best = past / future;
32: current = i;
33: }
34: }
36: PetscMalloc1(n, &done);
37: PetscArrayzero(done, n);
38: PetscMalloc1(n, &order);
39: order[0] = current;
40: for (i = 0; i < n - 1; i++) {
41: done[current] = PETSC_TRUE;
42: best = -1;
43: /* loop over all neighbors of current pivot */
44: for (j = ai[current]; j < ai[current + 1]; j++) {
45: jj = aj[j];
46: if (done[jj]) continue;
47: /* loop over columns of potential next row computing weights for below and above diagonal */
48: past = future = 0.0;
49: for (k = ai[jj]; k < ai[jj + 1]; k++) {
50: kk = aj[k];
51: if (done[kk]) past += PetscAbsScalar(aa[k]);
52: else if (kk != jj) future += PetscAbsScalar(aa[k]);
53: }
54: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
55: if (past / future > best) {
56: best = past / future;
57: newcurrent = jj;
58: }
59: }
60: if (best == -1) { /* no neighbors to select from so select best of all that remain */
61: best = -1;
62: for (k = 0; k < n; k++) {
63: if (done[k]) continue;
64: future = 0.0;
65: past = 0.0;
66: for (j = ai[k]; j < ai[k + 1]; j++) {
67: kk = aj[j];
68: if (done[kk]) past += PetscAbsScalar(aa[j]);
69: else if (kk != k) future += PetscAbsScalar(aa[j]);
70: }
71: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
72: if (past / future > best) {
73: best = past / future;
74: newcurrent = k;
75: }
76: }
77: }
79: current = newcurrent;
80: order[i + 1] = current;
81: }
82: ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow);
83: *icol = *irow;
84: PetscObjectReference((PetscObject)*irow);
85: PetscFree(done);
86: PetscFree(order);
87: return 0;
88: }
90: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
91: {
92: *type = MATSOLVERPETSC;
93: return 0;
94: }
96: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
97: {
98: PetscInt n = A->rmap->n;
100: #if defined(PETSC_USE_COMPLEX)
102: #endif
103: MatCreate(PetscObjectComm((PetscObject)A), B);
104: MatSetSizes(*B, n, n, n, n);
105: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
106: MatSetType(*B, MATSEQAIJ);
108: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
109: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
111: MatSetBlockSizesFromMats(*B, A, A);
112: PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]);
113: PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]);
114: PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]);
115: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
116: MatSetType(*B, MATSEQSBAIJ);
117: MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL);
119: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
120: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
121: PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]);
122: PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]);
123: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
124: (*B)->factortype = ftype;
126: PetscFree((*B)->solvertype);
127: PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype);
128: (*B)->canuseordering = PETSC_TRUE;
129: PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc);
130: return 0;
131: }
133: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
134: {
135: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
136: IS isicol;
137: const PetscInt *r, *ic;
138: PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j;
139: PetscInt *bi, *bj, *ajtmp;
140: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
141: PetscReal f;
142: PetscInt nlnk, *lnk, k, **bi_ptr;
143: PetscFreeSpaceList free_space = NULL, current_space = NULL;
144: PetscBT lnkbt;
145: PetscBool missing;
148: MatMissingDiagonal(A, &missing, &i);
151: ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
152: ISGetIndices(isrow, &r);
153: ISGetIndices(isicol, &ic);
155: /* get new row pointers */
156: PetscMalloc1(n + 1, &bi);
157: bi[0] = 0;
159: /* bdiag is location of diagonal in factor */
160: PetscMalloc1(n + 1, &bdiag);
161: bdiag[0] = 0;
163: /* linked list for storing column indices of the active row */
164: nlnk = n + 1;
165: PetscLLCreate(n, n, nlnk, lnk, lnkbt);
167: PetscMalloc2(n + 1, &bi_ptr, n + 1, &im);
169: /* initial FreeSpace size is f*(ai[n]+1) */
170: f = info->fill;
171: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
172: PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space);
173: current_space = free_space;
175: for (i = 0; i < n; i++) {
176: /* copy previous fill into linked list */
177: nzi = 0;
178: nnz = ai[r[i] + 1] - ai[r[i]];
179: ajtmp = aj + ai[r[i]];
180: PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt);
181: nzi += nlnk;
183: /* add pivot rows into linked list */
184: row = lnk[n];
185: while (row < i) {
186: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
187: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
188: PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im);
189: nzi += nlnk;
190: row = lnk[row];
191: }
192: bi[i + 1] = bi[i] + nzi;
193: im[i] = nzi;
195: /* mark bdiag */
196: nzbd = 0;
197: nnz = nzi;
198: k = lnk[n];
199: while (nnz-- && k < i) {
200: nzbd++;
201: k = lnk[k];
202: }
203: bdiag[i] = bi[i] + nzbd;
205: /* if free space is not available, make more free space */
206: if (current_space->local_remaining < nzi) {
207: nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
208: PetscFreeSpaceGet(nnz, ¤t_space);
209: reallocs++;
210: }
212: /* copy data into free space, then initialize lnk */
213: PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt);
215: bi_ptr[i] = current_space->array;
216: current_space->array += nzi;
217: current_space->local_used += nzi;
218: current_space->local_remaining -= nzi;
219: }
220: #if defined(PETSC_USE_INFO)
221: if (ai[n] != 0) {
222: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
223: PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af);
224: PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
225: PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af);
226: PetscInfo(A, "for best performance.\n");
227: } else {
228: PetscInfo(A, "Empty matrix\n");
229: }
230: #endif
232: ISRestoreIndices(isrow, &r);
233: ISRestoreIndices(isicol, &ic);
235: /* destroy list of free space and other temporary array(s) */
236: PetscMalloc1(bi[n] + 1, &bj);
237: PetscFreeSpaceContiguous(&free_space, bj);
238: PetscLLDestroy(lnk, lnkbt);
239: PetscFree2(bi_ptr, im);
241: /* put together the new matrix */
242: MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL);
243: b = (Mat_SeqAIJ *)(B)->data;
245: b->free_a = PETSC_TRUE;
246: b->free_ij = PETSC_TRUE;
247: b->singlemalloc = PETSC_FALSE;
249: PetscMalloc1(bi[n] + 1, &b->a);
250: b->j = bj;
251: b->i = bi;
252: b->diag = bdiag;
253: b->ilen = NULL;
254: b->imax = NULL;
255: b->row = isrow;
256: b->col = iscol;
257: PetscObjectReference((PetscObject)isrow);
258: PetscObjectReference((PetscObject)iscol);
259: b->icol = isicol;
260: PetscMalloc1(n + 1, &b->solve_work);
262: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
263: b->maxnz = b->nz = bi[n];
265: (B)->factortype = MAT_FACTOR_LU;
266: (B)->info.factor_mallocs = reallocs;
267: (B)->info.fill_ratio_given = f;
269: if (ai[n]) {
270: (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
271: } else {
272: (B)->info.fill_ratio_needed = 0.0;
273: }
274: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
275: if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
276: return 0;
277: }
279: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
280: {
281: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
282: IS isicol;
283: const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
284: PetscInt i, n = A->rmap->n;
285: PetscInt *bi, *bj;
286: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
287: PetscReal f;
288: PetscInt nlnk, *lnk, k, **bi_ptr;
289: PetscFreeSpaceList free_space = NULL, current_space = NULL;
290: PetscBT lnkbt;
291: PetscBool missing;
294: MatMissingDiagonal(A, &missing, &i);
297: ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
298: ISGetIndices(isrow, &r);
299: ISGetIndices(isicol, &ic);
301: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
302: PetscMalloc1(n + 1, &bi);
303: PetscMalloc1(n + 1, &bdiag);
304: bi[0] = bdiag[0] = 0;
306: /* linked list for storing column indices of the active row */
307: nlnk = n + 1;
308: PetscLLCreate(n, n, nlnk, lnk, lnkbt);
310: PetscMalloc2(n + 1, &bi_ptr, n + 1, &im);
312: /* initial FreeSpace size is f*(ai[n]+1) */
313: f = info->fill;
314: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
315: PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space);
316: current_space = free_space;
318: for (i = 0; i < n; i++) {
319: /* copy previous fill into linked list */
320: nzi = 0;
321: nnz = ai[r[i] + 1] - ai[r[i]];
322: ajtmp = aj + ai[r[i]];
323: PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt);
324: nzi += nlnk;
326: /* add pivot rows into linked list */
327: row = lnk[n];
328: while (row < i) {
329: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
330: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
331: PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im);
332: nzi += nlnk;
333: row = lnk[row];
334: }
335: bi[i + 1] = bi[i] + nzi;
336: im[i] = nzi;
338: /* mark bdiag */
339: nzbd = 0;
340: nnz = nzi;
341: k = lnk[n];
342: while (nnz-- && k < i) {
343: nzbd++;
344: k = lnk[k];
345: }
346: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
348: /* if free space is not available, make more free space */
349: if (current_space->local_remaining < nzi) {
350: /* estimated additional space needed */
351: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
352: PetscFreeSpaceGet(nnz, ¤t_space);
353: reallocs++;
354: }
356: /* copy data into free space, then initialize lnk */
357: PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt);
359: bi_ptr[i] = current_space->array;
360: current_space->array += nzi;
361: current_space->local_used += nzi;
362: current_space->local_remaining -= nzi;
363: }
365: ISRestoreIndices(isrow, &r);
366: ISRestoreIndices(isicol, &ic);
368: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
369: PetscMalloc1(bi[n] + 1, &bj);
370: PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag);
371: PetscLLDestroy(lnk, lnkbt);
372: PetscFree2(bi_ptr, im);
374: /* put together the new matrix */
375: MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL);
376: b = (Mat_SeqAIJ *)(B)->data;
378: b->free_a = PETSC_TRUE;
379: b->free_ij = PETSC_TRUE;
380: b->singlemalloc = PETSC_FALSE;
382: PetscMalloc1(bdiag[0] + 1, &b->a);
384: b->j = bj;
385: b->i = bi;
386: b->diag = bdiag;
387: b->ilen = NULL;
388: b->imax = NULL;
389: b->row = isrow;
390: b->col = iscol;
391: PetscObjectReference((PetscObject)isrow);
392: PetscObjectReference((PetscObject)iscol);
393: b->icol = isicol;
394: PetscMalloc1(n + 1, &b->solve_work);
396: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
397: b->maxnz = b->nz = bdiag[0] + 1;
399: B->factortype = MAT_FACTOR_LU;
400: B->info.factor_mallocs = reallocs;
401: B->info.fill_ratio_given = f;
403: if (ai[n]) {
404: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
405: } else {
406: B->info.fill_ratio_needed = 0.0;
407: }
408: #if defined(PETSC_USE_INFO)
409: if (ai[n] != 0) {
410: PetscReal af = B->info.fill_ratio_needed;
411: PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af);
412: PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
413: PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af);
414: PetscInfo(A, "for best performance.\n");
415: } else {
416: PetscInfo(A, "Empty matrix\n");
417: }
418: #endif
419: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
420: if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
421: MatSeqAIJCheckInode_FactorLU(B);
422: return 0;
423: }
425: /*
426: Trouble in factorization, should we dump the original matrix?
427: */
428: PetscErrorCode MatFactorDumpMatrix(Mat A)
429: {
430: PetscBool flg = PETSC_FALSE;
432: PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL);
433: if (flg) {
434: PetscViewer viewer;
435: char filename[PETSC_MAX_PATH_LEN];
437: PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank);
438: PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer);
439: MatView(A, viewer);
440: PetscViewerDestroy(&viewer);
441: }
442: return 0;
443: }
445: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
446: {
447: Mat C = B;
448: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
449: IS isrow = b->row, isicol = b->icol;
450: const PetscInt *r, *ic, *ics;
451: const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
452: PetscInt i, j, k, nz, nzL, row, *pj;
453: const PetscInt *ajtmp, *bjtmp;
454: MatScalar *rtmp, *pc, multiplier, *pv;
455: const MatScalar *aa = a->a, *v;
456: PetscBool row_identity, col_identity;
457: FactorShiftCtx sctx;
458: const PetscInt *ddiag;
459: PetscReal rs;
460: MatScalar d;
462: /* MatPivotSetUp(): initialize shift context sctx */
463: PetscMemzero(&sctx, sizeof(FactorShiftCtx));
465: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
466: ddiag = a->diag;
467: sctx.shift_top = info->zeropivot;
468: for (i = 0; i < n; i++) {
469: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
470: d = (aa)[ddiag[i]];
471: rs = -PetscAbsScalar(d) - PetscRealPart(d);
472: v = aa + ai[i];
473: nz = ai[i + 1] - ai[i];
474: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
475: if (rs > sctx.shift_top) sctx.shift_top = rs;
476: }
477: sctx.shift_top *= 1.1;
478: sctx.nshift_max = 5;
479: sctx.shift_lo = 0.;
480: sctx.shift_hi = 1.;
481: }
483: ISGetIndices(isrow, &r);
484: ISGetIndices(isicol, &ic);
485: PetscMalloc1(n + 1, &rtmp);
486: ics = ic;
488: do {
489: sctx.newshift = PETSC_FALSE;
490: for (i = 0; i < n; i++) {
491: /* zero rtmp */
492: /* L part */
493: nz = bi[i + 1] - bi[i];
494: bjtmp = bj + bi[i];
495: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
497: /* U part */
498: nz = bdiag[i] - bdiag[i + 1];
499: bjtmp = bj + bdiag[i + 1] + 1;
500: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
502: /* load in initial (unfactored row) */
503: nz = ai[r[i] + 1] - ai[r[i]];
504: ajtmp = aj + ai[r[i]];
505: v = aa + ai[r[i]];
506: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
507: /* ZeropivotApply() */
508: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
510: /* elimination */
511: bjtmp = bj + bi[i];
512: row = *bjtmp++;
513: nzL = bi[i + 1] - bi[i];
514: for (k = 0; k < nzL; k++) {
515: pc = rtmp + row;
516: if (*pc != 0.0) {
517: pv = b->a + bdiag[row];
518: multiplier = *pc * (*pv);
519: *pc = multiplier;
521: pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
522: pv = b->a + bdiag[row + 1] + 1;
523: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
525: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
526: PetscLogFlops(1 + 2.0 * nz);
527: }
528: row = *bjtmp++;
529: }
531: /* finished row so stick it into b->a */
532: rs = 0.0;
533: /* L part */
534: pv = b->a + bi[i];
535: pj = b->j + bi[i];
536: nz = bi[i + 1] - bi[i];
537: for (j = 0; j < nz; j++) {
538: pv[j] = rtmp[pj[j]];
539: rs += PetscAbsScalar(pv[j]);
540: }
542: /* U part */
543: pv = b->a + bdiag[i + 1] + 1;
544: pj = b->j + bdiag[i + 1] + 1;
545: nz = bdiag[i] - bdiag[i + 1] - 1;
546: for (j = 0; j < nz; j++) {
547: pv[j] = rtmp[pj[j]];
548: rs += PetscAbsScalar(pv[j]);
549: }
551: sctx.rs = rs;
552: sctx.pv = rtmp[i];
553: MatPivotCheck(B, A, info, &sctx, i);
554: if (sctx.newshift) break; /* break for-loop */
555: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
557: /* Mark diagonal and invert diagonal for simpler triangular solves */
558: pv = b->a + bdiag[i];
559: *pv = 1.0 / rtmp[i];
561: } /* endof for (i=0; i<n; i++) { */
563: /* MatPivotRefine() */
564: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
565: /*
566: * if no shift in this attempt & shifting & started shifting & can refine,
567: * then try lower shift
568: */
569: sctx.shift_hi = sctx.shift_fraction;
570: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
571: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
572: sctx.newshift = PETSC_TRUE;
573: sctx.nshift++;
574: }
575: } while (sctx.newshift);
577: PetscFree(rtmp);
578: ISRestoreIndices(isicol, &ic);
579: ISRestoreIndices(isrow, &r);
581: ISIdentity(isrow, &row_identity);
582: ISIdentity(isicol, &col_identity);
583: if (b->inode.size) {
584: C->ops->solve = MatSolve_SeqAIJ_Inode;
585: } else if (row_identity && col_identity) {
586: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
587: } else {
588: C->ops->solve = MatSolve_SeqAIJ;
589: }
590: C->ops->solveadd = MatSolveAdd_SeqAIJ;
591: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
592: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
593: C->ops->matsolve = MatMatSolve_SeqAIJ;
594: C->assembled = PETSC_TRUE;
595: C->preallocated = PETSC_TRUE;
597: PetscLogFlops(C->cmap->n);
599: /* MatShiftView(A,info,&sctx) */
600: if (sctx.nshift) {
601: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
602: PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top);
603: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
604: PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
605: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
606: PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount);
607: }
608: }
609: return 0;
610: }
612: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
613: {
614: Mat C = B;
615: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
616: IS isrow = b->row, isicol = b->icol;
617: const PetscInt *r, *ic, *ics;
618: PetscInt nz, row, i, j, n = A->rmap->n, diag;
619: const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
620: const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
621: MatScalar *pv, *rtmp, *pc, multiplier, d;
622: const MatScalar *v, *aa = a->a;
623: PetscReal rs = 0.0;
624: FactorShiftCtx sctx;
625: const PetscInt *ddiag;
626: PetscBool row_identity, col_identity;
628: /* MatPivotSetUp(): initialize shift context sctx */
629: PetscMemzero(&sctx, sizeof(FactorShiftCtx));
631: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
632: ddiag = a->diag;
633: sctx.shift_top = info->zeropivot;
634: for (i = 0; i < n; i++) {
635: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
636: d = (aa)[ddiag[i]];
637: rs = -PetscAbsScalar(d) - PetscRealPart(d);
638: v = aa + ai[i];
639: nz = ai[i + 1] - ai[i];
640: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
641: if (rs > sctx.shift_top) sctx.shift_top = rs;
642: }
643: sctx.shift_top *= 1.1;
644: sctx.nshift_max = 5;
645: sctx.shift_lo = 0.;
646: sctx.shift_hi = 1.;
647: }
649: ISGetIndices(isrow, &r);
650: ISGetIndices(isicol, &ic);
651: PetscMalloc1(n + 1, &rtmp);
652: ics = ic;
654: do {
655: sctx.newshift = PETSC_FALSE;
656: for (i = 0; i < n; i++) {
657: nz = bi[i + 1] - bi[i];
658: bjtmp = bj + bi[i];
659: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
661: /* load in initial (unfactored row) */
662: nz = ai[r[i] + 1] - ai[r[i]];
663: ajtmp = aj + ai[r[i]];
664: v = aa + ai[r[i]];
665: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
666: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
668: row = *bjtmp++;
669: while (row < i) {
670: pc = rtmp + row;
671: if (*pc != 0.0) {
672: pv = b->a + diag_offset[row];
673: pj = b->j + diag_offset[row] + 1;
674: multiplier = *pc / *pv++;
675: *pc = multiplier;
676: nz = bi[row + 1] - diag_offset[row] - 1;
677: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
678: PetscLogFlops(1 + 2.0 * nz);
679: }
680: row = *bjtmp++;
681: }
682: /* finished row so stick it into b->a */
683: pv = b->a + bi[i];
684: pj = b->j + bi[i];
685: nz = bi[i + 1] - bi[i];
686: diag = diag_offset[i] - bi[i];
687: rs = 0.0;
688: for (j = 0; j < nz; j++) {
689: pv[j] = rtmp[pj[j]];
690: rs += PetscAbsScalar(pv[j]);
691: }
692: rs -= PetscAbsScalar(pv[diag]);
694: sctx.rs = rs;
695: sctx.pv = pv[diag];
696: MatPivotCheck(B, A, info, &sctx, i);
697: if (sctx.newshift) break;
698: pv[diag] = sctx.pv;
699: }
701: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
702: /*
703: * if no shift in this attempt & shifting & started shifting & can refine,
704: * then try lower shift
705: */
706: sctx.shift_hi = sctx.shift_fraction;
707: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
708: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
709: sctx.newshift = PETSC_TRUE;
710: sctx.nshift++;
711: }
712: } while (sctx.newshift);
714: /* invert diagonal entries for simpler triangular solves */
715: for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
716: PetscFree(rtmp);
717: ISRestoreIndices(isicol, &ic);
718: ISRestoreIndices(isrow, &r);
720: ISIdentity(isrow, &row_identity);
721: ISIdentity(isicol, &col_identity);
722: if (row_identity && col_identity) {
723: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
724: } else {
725: C->ops->solve = MatSolve_SeqAIJ_inplace;
726: }
727: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
728: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
729: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
730: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
732: C->assembled = PETSC_TRUE;
733: C->preallocated = PETSC_TRUE;
735: PetscLogFlops(C->cmap->n);
736: if (sctx.nshift) {
737: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
738: PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top);
739: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
740: PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
741: }
742: }
743: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
744: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
746: MatSeqAIJCheckInode(C);
747: return 0;
748: }
750: /*
751: This routine implements inplace ILU(0) with row or/and column permutations.
752: Input:
753: A - original matrix
754: Output;
755: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
756: a->j (col index) is permuted by the inverse of colperm, then sorted
757: a->a reordered accordingly with a->j
758: a->diag (ptr to diagonal elements) is updated.
759: */
760: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
761: {
762: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
763: IS isrow = a->row, isicol = a->icol;
764: const PetscInt *r, *ic, *ics;
765: PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
766: PetscInt *ajtmp, nz, row;
767: PetscInt *diag = a->diag, nbdiag, *pj;
768: PetscScalar *rtmp, *pc, multiplier, d;
769: MatScalar *pv, *v;
770: PetscReal rs;
771: FactorShiftCtx sctx;
772: const MatScalar *aa = a->a, *vtmp;
776: /* MatPivotSetUp(): initialize shift context sctx */
777: PetscMemzero(&sctx, sizeof(FactorShiftCtx));
779: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
780: const PetscInt *ddiag = a->diag;
781: sctx.shift_top = info->zeropivot;
782: for (i = 0; i < n; i++) {
783: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
784: d = (aa)[ddiag[i]];
785: rs = -PetscAbsScalar(d) - PetscRealPart(d);
786: vtmp = aa + ai[i];
787: nz = ai[i + 1] - ai[i];
788: for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
789: if (rs > sctx.shift_top) sctx.shift_top = rs;
790: }
791: sctx.shift_top *= 1.1;
792: sctx.nshift_max = 5;
793: sctx.shift_lo = 0.;
794: sctx.shift_hi = 1.;
795: }
797: ISGetIndices(isrow, &r);
798: ISGetIndices(isicol, &ic);
799: PetscMalloc1(n + 1, &rtmp);
800: PetscArrayzero(rtmp, n + 1);
801: ics = ic;
803: #if defined(MV)
804: sctx.shift_top = 0.;
805: sctx.nshift_max = 0;
806: sctx.shift_lo = 0.;
807: sctx.shift_hi = 0.;
808: sctx.shift_fraction = 0.;
810: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
811: sctx.shift_top = 0.;
812: for (i = 0; i < n; i++) {
813: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
814: d = (a->a)[diag[i]];
815: rs = -PetscAbsScalar(d) - PetscRealPart(d);
816: v = a->a + ai[i];
817: nz = ai[i + 1] - ai[i];
818: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
819: if (rs > sctx.shift_top) sctx.shift_top = rs;
820: }
821: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
822: sctx.shift_top *= 1.1;
823: sctx.nshift_max = 5;
824: sctx.shift_lo = 0.;
825: sctx.shift_hi = 1.;
826: }
828: sctx.shift_amount = 0.;
829: sctx.nshift = 0;
830: #endif
832: do {
833: sctx.newshift = PETSC_FALSE;
834: for (i = 0; i < n; i++) {
835: /* load in initial unfactored row */
836: nz = ai[r[i] + 1] - ai[r[i]];
837: ajtmp = aj + ai[r[i]];
838: v = a->a + ai[r[i]];
839: /* sort permuted ajtmp and values v accordingly */
840: for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
841: PetscSortIntWithScalarArray(nz, ajtmp, v);
843: diag[r[i]] = ai[r[i]];
844: for (j = 0; j < nz; j++) {
845: rtmp[ajtmp[j]] = v[j];
846: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
847: }
848: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
850: row = *ajtmp++;
851: while (row < i) {
852: pc = rtmp + row;
853: if (*pc != 0.0) {
854: pv = a->a + diag[r[row]];
855: pj = aj + diag[r[row]] + 1;
857: multiplier = *pc / *pv++;
858: *pc = multiplier;
859: nz = ai[r[row] + 1] - diag[r[row]] - 1;
860: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
861: PetscLogFlops(1 + 2.0 * nz);
862: }
863: row = *ajtmp++;
864: }
865: /* finished row so overwrite it onto a->a */
866: pv = a->a + ai[r[i]];
867: pj = aj + ai[r[i]];
868: nz = ai[r[i] + 1] - ai[r[i]];
869: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
871: rs = 0.0;
872: for (j = 0; j < nz; j++) {
873: pv[j] = rtmp[pj[j]];
874: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
875: }
877: sctx.rs = rs;
878: sctx.pv = pv[nbdiag];
879: MatPivotCheck(B, A, info, &sctx, i);
880: if (sctx.newshift) break;
881: pv[nbdiag] = sctx.pv;
882: }
884: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
885: /*
886: * if no shift in this attempt & shifting & started shifting & can refine,
887: * then try lower shift
888: */
889: sctx.shift_hi = sctx.shift_fraction;
890: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
891: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
892: sctx.newshift = PETSC_TRUE;
893: sctx.nshift++;
894: }
895: } while (sctx.newshift);
897: /* invert diagonal entries for simpler triangular solves */
898: for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
900: PetscFree(rtmp);
901: ISRestoreIndices(isicol, &ic);
902: ISRestoreIndices(isrow, &r);
904: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
905: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
906: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
907: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
909: A->assembled = PETSC_TRUE;
910: A->preallocated = PETSC_TRUE;
912: PetscLogFlops(A->cmap->n);
913: if (sctx.nshift) {
914: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
915: PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top);
916: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
917: PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
918: }
919: }
920: return 0;
921: }
923: /* ----------------------------------------------------------- */
924: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
925: {
926: Mat C;
928: MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C);
929: MatLUFactorSymbolic(C, A, row, col, info);
930: MatLUFactorNumeric(C, A, info);
932: A->ops->solve = C->ops->solve;
933: A->ops->solvetranspose = C->ops->solvetranspose;
935: MatHeaderMerge(A, &C);
936: return 0;
937: }
938: /* ----------------------------------------------------------- */
940: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
941: {
942: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
943: IS iscol = a->col, isrow = a->row;
944: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
945: PetscInt nz;
946: const PetscInt *rout, *cout, *r, *c;
947: PetscScalar *x, *tmp, *tmps, sum;
948: const PetscScalar *b;
949: const MatScalar *aa = a->a, *v;
951: if (!n) return 0;
953: VecGetArrayRead(bb, &b);
954: VecGetArrayWrite(xx, &x);
955: tmp = a->solve_work;
957: ISGetIndices(isrow, &rout);
958: r = rout;
959: ISGetIndices(iscol, &cout);
960: c = cout + (n - 1);
962: /* forward solve the lower triangular */
963: tmp[0] = b[*r++];
964: tmps = tmp;
965: for (i = 1; i < n; i++) {
966: v = aa + ai[i];
967: vi = aj + ai[i];
968: nz = a->diag[i] - ai[i];
969: sum = b[*r++];
970: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
971: tmp[i] = sum;
972: }
974: /* backward solve the upper triangular */
975: for (i = n - 1; i >= 0; i--) {
976: v = aa + a->diag[i] + 1;
977: vi = aj + a->diag[i] + 1;
978: nz = ai[i + 1] - a->diag[i] - 1;
979: sum = tmp[i];
980: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
981: x[*c--] = tmp[i] = sum * aa[a->diag[i]];
982: }
984: ISRestoreIndices(isrow, &rout);
985: ISRestoreIndices(iscol, &cout);
986: VecRestoreArrayRead(bb, &b);
987: VecRestoreArrayWrite(xx, &x);
988: PetscLogFlops(2.0 * a->nz - A->cmap->n);
989: return 0;
990: }
992: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
993: {
994: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
995: IS iscol = a->col, isrow = a->row;
996: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
997: PetscInt nz, neq, ldb, ldx;
998: const PetscInt *rout, *cout, *r, *c;
999: PetscScalar *x, *tmp = a->solve_work, *tmps, sum;
1000: const PetscScalar *b, *aa = a->a, *v;
1001: PetscBool isdense;
1003: if (!n) return 0;
1004: PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense);
1006: if (X != B) {
1007: PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense);
1009: }
1010: MatDenseGetArrayRead(B, &b);
1011: MatDenseGetLDA(B, &ldb);
1012: MatDenseGetArray(X, &x);
1013: MatDenseGetLDA(X, &ldx);
1014: ISGetIndices(isrow, &rout);
1015: r = rout;
1016: ISGetIndices(iscol, &cout);
1017: c = cout;
1018: for (neq = 0; neq < B->cmap->n; neq++) {
1019: /* forward solve the lower triangular */
1020: tmp[0] = b[r[0]];
1021: tmps = tmp;
1022: for (i = 1; i < n; i++) {
1023: v = aa + ai[i];
1024: vi = aj + ai[i];
1025: nz = a->diag[i] - ai[i];
1026: sum = b[r[i]];
1027: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1028: tmp[i] = sum;
1029: }
1030: /* backward solve the upper triangular */
1031: for (i = n - 1; i >= 0; i--) {
1032: v = aa + a->diag[i] + 1;
1033: vi = aj + a->diag[i] + 1;
1034: nz = ai[i + 1] - a->diag[i] - 1;
1035: sum = tmp[i];
1036: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1037: x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1038: }
1039: b += ldb;
1040: x += ldx;
1041: }
1042: ISRestoreIndices(isrow, &rout);
1043: ISRestoreIndices(iscol, &cout);
1044: MatDenseRestoreArrayRead(B, &b);
1045: MatDenseRestoreArray(X, &x);
1046: PetscLogFlops(B->cmap->n * (2.0 * a->nz - n));
1047: return 0;
1048: }
1050: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1051: {
1052: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1053: IS iscol = a->col, isrow = a->row;
1054: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1055: PetscInt nz, neq, ldb, ldx;
1056: const PetscInt *rout, *cout, *r, *c;
1057: PetscScalar *x, *tmp = a->solve_work, sum;
1058: const PetscScalar *b, *aa = a->a, *v;
1059: PetscBool isdense;
1061: if (!n) return 0;
1062: PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense);
1064: if (X != B) {
1065: PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense);
1067: }
1068: MatDenseGetArrayRead(B, &b);
1069: MatDenseGetLDA(B, &ldb);
1070: MatDenseGetArray(X, &x);
1071: MatDenseGetLDA(X, &ldx);
1072: ISGetIndices(isrow, &rout);
1073: r = rout;
1074: ISGetIndices(iscol, &cout);
1075: c = cout;
1076: for (neq = 0; neq < B->cmap->n; neq++) {
1077: /* forward solve the lower triangular */
1078: tmp[0] = b[r[0]];
1079: v = aa;
1080: vi = aj;
1081: for (i = 1; i < n; i++) {
1082: nz = ai[i + 1] - ai[i];
1083: sum = b[r[i]];
1084: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1085: tmp[i] = sum;
1086: v += nz;
1087: vi += nz;
1088: }
1089: /* backward solve the upper triangular */
1090: for (i = n - 1; i >= 0; i--) {
1091: v = aa + adiag[i + 1] + 1;
1092: vi = aj + adiag[i + 1] + 1;
1093: nz = adiag[i] - adiag[i + 1] - 1;
1094: sum = tmp[i];
1095: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1096: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1097: }
1098: b += ldb;
1099: x += ldx;
1100: }
1101: ISRestoreIndices(isrow, &rout);
1102: ISRestoreIndices(iscol, &cout);
1103: MatDenseRestoreArrayRead(B, &b);
1104: MatDenseRestoreArray(X, &x);
1105: PetscLogFlops(B->cmap->n * (2.0 * a->nz - n));
1106: return 0;
1107: }
1109: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1110: {
1111: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1112: IS iscol = a->col, isrow = a->row;
1113: const PetscInt *r, *c, *rout, *cout;
1114: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1115: PetscInt nz, row;
1116: PetscScalar *x, *tmp, *tmps, sum;
1117: const PetscScalar *b;
1118: const MatScalar *aa = a->a, *v;
1120: if (!n) return 0;
1122: VecGetArrayRead(bb, &b);
1123: VecGetArrayWrite(xx, &x);
1124: tmp = a->solve_work;
1126: ISGetIndices(isrow, &rout);
1127: r = rout;
1128: ISGetIndices(iscol, &cout);
1129: c = cout + (n - 1);
1131: /* forward solve the lower triangular */
1132: tmp[0] = b[*r++];
1133: tmps = tmp;
1134: for (row = 1; row < n; row++) {
1135: i = rout[row]; /* permuted row */
1136: v = aa + ai[i];
1137: vi = aj + ai[i];
1138: nz = a->diag[i] - ai[i];
1139: sum = b[*r++];
1140: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1141: tmp[row] = sum;
1142: }
1144: /* backward solve the upper triangular */
1145: for (row = n - 1; row >= 0; row--) {
1146: i = rout[row]; /* permuted row */
1147: v = aa + a->diag[i] + 1;
1148: vi = aj + a->diag[i] + 1;
1149: nz = ai[i + 1] - a->diag[i] - 1;
1150: sum = tmp[row];
1151: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1152: x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1153: }
1155: ISRestoreIndices(isrow, &rout);
1156: ISRestoreIndices(iscol, &cout);
1157: VecRestoreArrayRead(bb, &b);
1158: VecRestoreArrayWrite(xx, &x);
1159: PetscLogFlops(2.0 * a->nz - A->cmap->n);
1160: return 0;
1161: }
1163: /* ----------------------------------------------------------- */
1164: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1165: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1166: {
1167: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1168: PetscInt n = A->rmap->n;
1169: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag;
1170: PetscScalar *x;
1171: const PetscScalar *b;
1172: const MatScalar *aa = a->a;
1173: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1174: PetscInt adiag_i, i, nz, ai_i;
1175: const PetscInt *vi;
1176: const MatScalar *v;
1177: PetscScalar sum;
1178: #endif
1180: if (!n) return 0;
1182: VecGetArrayRead(bb, &b);
1183: VecGetArrayWrite(xx, &x);
1185: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1186: fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1187: #else
1188: /* forward solve the lower triangular */
1189: x[0] = b[0];
1190: for (i = 1; i < n; i++) {
1191: ai_i = ai[i];
1192: v = aa + ai_i;
1193: vi = aj + ai_i;
1194: nz = adiag[i] - ai_i;
1195: sum = b[i];
1196: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1197: x[i] = sum;
1198: }
1200: /* backward solve the upper triangular */
1201: for (i = n - 1; i >= 0; i--) {
1202: adiag_i = adiag[i];
1203: v = aa + adiag_i + 1;
1204: vi = aj + adiag_i + 1;
1205: nz = ai[i + 1] - adiag_i - 1;
1206: sum = x[i];
1207: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1208: x[i] = sum * aa[adiag_i];
1209: }
1210: #endif
1211: PetscLogFlops(2.0 * a->nz - A->cmap->n);
1212: VecRestoreArrayRead(bb, &b);
1213: VecRestoreArrayWrite(xx, &x);
1214: return 0;
1215: }
1217: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1218: {
1219: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1220: IS iscol = a->col, isrow = a->row;
1221: PetscInt i, n = A->rmap->n, j;
1222: PetscInt nz;
1223: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1224: PetscScalar *x, *tmp, sum;
1225: const PetscScalar *b;
1226: const MatScalar *aa = a->a, *v;
1228: if (yy != xx) VecCopy(yy, xx);
1230: VecGetArrayRead(bb, &b);
1231: VecGetArray(xx, &x);
1232: tmp = a->solve_work;
1234: ISGetIndices(isrow, &rout);
1235: r = rout;
1236: ISGetIndices(iscol, &cout);
1237: c = cout + (n - 1);
1239: /* forward solve the lower triangular */
1240: tmp[0] = b[*r++];
1241: for (i = 1; i < n; i++) {
1242: v = aa + ai[i];
1243: vi = aj + ai[i];
1244: nz = a->diag[i] - ai[i];
1245: sum = b[*r++];
1246: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1247: tmp[i] = sum;
1248: }
1250: /* backward solve the upper triangular */
1251: for (i = n - 1; i >= 0; i--) {
1252: v = aa + a->diag[i] + 1;
1253: vi = aj + a->diag[i] + 1;
1254: nz = ai[i + 1] - a->diag[i] - 1;
1255: sum = tmp[i];
1256: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1257: tmp[i] = sum * aa[a->diag[i]];
1258: x[*c--] += tmp[i];
1259: }
1261: ISRestoreIndices(isrow, &rout);
1262: ISRestoreIndices(iscol, &cout);
1263: VecRestoreArrayRead(bb, &b);
1264: VecRestoreArray(xx, &x);
1265: PetscLogFlops(2.0 * a->nz);
1266: return 0;
1267: }
1269: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1270: {
1271: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1272: IS iscol = a->col, isrow = a->row;
1273: PetscInt i, n = A->rmap->n, j;
1274: PetscInt nz;
1275: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1276: PetscScalar *x, *tmp, sum;
1277: const PetscScalar *b;
1278: const MatScalar *aa = a->a, *v;
1280: if (yy != xx) VecCopy(yy, xx);
1282: VecGetArrayRead(bb, &b);
1283: VecGetArray(xx, &x);
1284: tmp = a->solve_work;
1286: ISGetIndices(isrow, &rout);
1287: r = rout;
1288: ISGetIndices(iscol, &cout);
1289: c = cout;
1291: /* forward solve the lower triangular */
1292: tmp[0] = b[r[0]];
1293: v = aa;
1294: vi = aj;
1295: for (i = 1; i < n; i++) {
1296: nz = ai[i + 1] - ai[i];
1297: sum = b[r[i]];
1298: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1299: tmp[i] = sum;
1300: v += nz;
1301: vi += nz;
1302: }
1304: /* backward solve the upper triangular */
1305: v = aa + adiag[n - 1];
1306: vi = aj + adiag[n - 1];
1307: for (i = n - 1; i >= 0; i--) {
1308: nz = adiag[i] - adiag[i + 1] - 1;
1309: sum = tmp[i];
1310: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1311: tmp[i] = sum * v[nz];
1312: x[c[i]] += tmp[i];
1313: v += nz + 1;
1314: vi += nz + 1;
1315: }
1317: ISRestoreIndices(isrow, &rout);
1318: ISRestoreIndices(iscol, &cout);
1319: VecRestoreArrayRead(bb, &b);
1320: VecRestoreArray(xx, &x);
1321: PetscLogFlops(2.0 * a->nz);
1322: return 0;
1323: }
1325: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1326: {
1327: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1328: IS iscol = a->col, isrow = a->row;
1329: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1330: PetscInt i, n = A->rmap->n, j;
1331: PetscInt nz;
1332: PetscScalar *x, *tmp, s1;
1333: const MatScalar *aa = a->a, *v;
1334: const PetscScalar *b;
1336: VecGetArrayRead(bb, &b);
1337: VecGetArrayWrite(xx, &x);
1338: tmp = a->solve_work;
1340: ISGetIndices(isrow, &rout);
1341: r = rout;
1342: ISGetIndices(iscol, &cout);
1343: c = cout;
1345: /* copy the b into temp work space according to permutation */
1346: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1348: /* forward solve the U^T */
1349: for (i = 0; i < n; i++) {
1350: v = aa + diag[i];
1351: vi = aj + diag[i] + 1;
1352: nz = ai[i + 1] - diag[i] - 1;
1353: s1 = tmp[i];
1354: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1355: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1356: tmp[i] = s1;
1357: }
1359: /* backward solve the L^T */
1360: for (i = n - 1; i >= 0; i--) {
1361: v = aa + diag[i] - 1;
1362: vi = aj + diag[i] - 1;
1363: nz = diag[i] - ai[i];
1364: s1 = tmp[i];
1365: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1366: }
1368: /* copy tmp into x according to permutation */
1369: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1371: ISRestoreIndices(isrow, &rout);
1372: ISRestoreIndices(iscol, &cout);
1373: VecRestoreArrayRead(bb, &b);
1374: VecRestoreArrayWrite(xx, &x);
1376: PetscLogFlops(2.0 * a->nz - A->cmap->n);
1377: return 0;
1378: }
1380: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1381: {
1382: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1383: IS iscol = a->col, isrow = a->row;
1384: const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1385: PetscInt i, n = A->rmap->n, j;
1386: PetscInt nz;
1387: PetscScalar *x, *tmp, s1;
1388: const MatScalar *aa = a->a, *v;
1389: const PetscScalar *b;
1391: VecGetArrayRead(bb, &b);
1392: VecGetArrayWrite(xx, &x);
1393: tmp = a->solve_work;
1395: ISGetIndices(isrow, &rout);
1396: r = rout;
1397: ISGetIndices(iscol, &cout);
1398: c = cout;
1400: /* copy the b into temp work space according to permutation */
1401: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1403: /* forward solve the U^T */
1404: for (i = 0; i < n; i++) {
1405: v = aa + adiag[i + 1] + 1;
1406: vi = aj + adiag[i + 1] + 1;
1407: nz = adiag[i] - adiag[i + 1] - 1;
1408: s1 = tmp[i];
1409: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1410: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1411: tmp[i] = s1;
1412: }
1414: /* backward solve the L^T */
1415: for (i = n - 1; i >= 0; i--) {
1416: v = aa + ai[i];
1417: vi = aj + ai[i];
1418: nz = ai[i + 1] - ai[i];
1419: s1 = tmp[i];
1420: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1421: }
1423: /* copy tmp into x according to permutation */
1424: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1426: ISRestoreIndices(isrow, &rout);
1427: ISRestoreIndices(iscol, &cout);
1428: VecRestoreArrayRead(bb, &b);
1429: VecRestoreArrayWrite(xx, &x);
1431: PetscLogFlops(2.0 * a->nz - A->cmap->n);
1432: return 0;
1433: }
1435: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1436: {
1437: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1438: IS iscol = a->col, isrow = a->row;
1439: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1440: PetscInt i, n = A->rmap->n, j;
1441: PetscInt nz;
1442: PetscScalar *x, *tmp, s1;
1443: const MatScalar *aa = a->a, *v;
1444: const PetscScalar *b;
1446: if (zz != xx) VecCopy(zz, xx);
1447: VecGetArrayRead(bb, &b);
1448: VecGetArray(xx, &x);
1449: tmp = a->solve_work;
1451: ISGetIndices(isrow, &rout);
1452: r = rout;
1453: ISGetIndices(iscol, &cout);
1454: c = cout;
1456: /* copy the b into temp work space according to permutation */
1457: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1459: /* forward solve the U^T */
1460: for (i = 0; i < n; i++) {
1461: v = aa + diag[i];
1462: vi = aj + diag[i] + 1;
1463: nz = ai[i + 1] - diag[i] - 1;
1464: s1 = tmp[i];
1465: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1466: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1467: tmp[i] = s1;
1468: }
1470: /* backward solve the L^T */
1471: for (i = n - 1; i >= 0; i--) {
1472: v = aa + diag[i] - 1;
1473: vi = aj + diag[i] - 1;
1474: nz = diag[i] - ai[i];
1475: s1 = tmp[i];
1476: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1477: }
1479: /* copy tmp into x according to permutation */
1480: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1482: ISRestoreIndices(isrow, &rout);
1483: ISRestoreIndices(iscol, &cout);
1484: VecRestoreArrayRead(bb, &b);
1485: VecRestoreArray(xx, &x);
1487: PetscLogFlops(2.0 * a->nz - A->cmap->n);
1488: return 0;
1489: }
1491: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1492: {
1493: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1494: IS iscol = a->col, isrow = a->row;
1495: const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1496: PetscInt i, n = A->rmap->n, j;
1497: PetscInt nz;
1498: PetscScalar *x, *tmp, s1;
1499: const MatScalar *aa = a->a, *v;
1500: const PetscScalar *b;
1502: if (zz != xx) VecCopy(zz, xx);
1503: VecGetArrayRead(bb, &b);
1504: VecGetArray(xx, &x);
1505: tmp = a->solve_work;
1507: ISGetIndices(isrow, &rout);
1508: r = rout;
1509: ISGetIndices(iscol, &cout);
1510: c = cout;
1512: /* copy the b into temp work space according to permutation */
1513: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1515: /* forward solve the U^T */
1516: for (i = 0; i < n; i++) {
1517: v = aa + adiag[i + 1] + 1;
1518: vi = aj + adiag[i + 1] + 1;
1519: nz = adiag[i] - adiag[i + 1] - 1;
1520: s1 = tmp[i];
1521: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1522: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1523: tmp[i] = s1;
1524: }
1526: /* backward solve the L^T */
1527: for (i = n - 1; i >= 0; i--) {
1528: v = aa + ai[i];
1529: vi = aj + ai[i];
1530: nz = ai[i + 1] - ai[i];
1531: s1 = tmp[i];
1532: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1533: }
1535: /* copy tmp into x according to permutation */
1536: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1538: ISRestoreIndices(isrow, &rout);
1539: ISRestoreIndices(iscol, &cout);
1540: VecRestoreArrayRead(bb, &b);
1541: VecRestoreArray(xx, &x);
1543: PetscLogFlops(2.0 * a->nz - A->cmap->n);
1544: return 0;
1545: }
1547: /* ----------------------------------------------------------------*/
1549: /*
1550: ilu() under revised new data structure.
1551: Factored arrays bj and ba are stored as
1552: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1554: bi=fact->i is an array of size n+1, in which
1555: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1556: bi[n]: points to L(n-1,n-1)+1
1558: bdiag=fact->diag is an array of size n+1,in which
1559: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1560: bdiag[n]: points to entry of U(n-1,0)-1
1562: U(i,:) contains bdiag[i] as its last entry, i.e.,
1563: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1564: */
1565: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1566: {
1567: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1568: const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1569: PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag;
1570: IS isicol;
1572: ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
1573: MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE);
1574: b = (Mat_SeqAIJ *)(fact)->data;
1576: /* allocate matrix arrays for new data structure */
1577: PetscMalloc3(ai[n] + 1, &b->a, ai[n] + 1, &b->j, n + 1, &b->i);
1579: b->singlemalloc = PETSC_TRUE;
1580: if (!b->diag) { PetscMalloc1(n + 1, &b->diag); }
1581: bdiag = b->diag;
1583: if (n > 0) PetscArrayzero(b->a, ai[n]);
1585: /* set bi and bj with new data structure */
1586: bi = b->i;
1587: bj = b->j;
1589: /* L part */
1590: bi[0] = 0;
1591: for (i = 0; i < n; i++) {
1592: nz = adiag[i] - ai[i];
1593: bi[i + 1] = bi[i] + nz;
1594: aj = a->j + ai[i];
1595: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1596: }
1598: /* U part */
1599: bdiag[n] = bi[n] - 1;
1600: for (i = n - 1; i >= 0; i--) {
1601: nz = ai[i + 1] - adiag[i] - 1;
1602: aj = a->j + adiag[i] + 1;
1603: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1604: /* diag[i] */
1605: bj[k++] = i;
1606: bdiag[i] = bdiag[i + 1] + nz + 1;
1607: }
1609: fact->factortype = MAT_FACTOR_ILU;
1610: fact->info.factor_mallocs = 0;
1611: fact->info.fill_ratio_given = info->fill;
1612: fact->info.fill_ratio_needed = 1.0;
1613: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1614: MatSeqAIJCheckInode_FactorLU(fact);
1616: b = (Mat_SeqAIJ *)(fact)->data;
1617: b->row = isrow;
1618: b->col = iscol;
1619: b->icol = isicol;
1620: PetscMalloc1(fact->rmap->n + 1, &b->solve_work);
1621: PetscObjectReference((PetscObject)isrow);
1622: PetscObjectReference((PetscObject)iscol);
1623: return 0;
1624: }
1626: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1627: {
1628: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1629: IS isicol;
1630: const PetscInt *r, *ic;
1631: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1632: PetscInt *bi, *cols, nnz, *cols_lvl;
1633: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1634: PetscInt i, levels, diagonal_fill;
1635: PetscBool col_identity, row_identity, missing;
1636: PetscReal f;
1637: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1638: PetscBT lnkbt;
1639: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1640: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1641: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1644: MatMissingDiagonal(A, &missing, &i);
1647: levels = (PetscInt)info->levels;
1648: ISIdentity(isrow, &row_identity);
1649: ISIdentity(iscol, &col_identity);
1650: if (!levels && row_identity && col_identity) {
1651: /* special case: ilu(0) with natural ordering */
1652: MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info);
1653: if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1654: return 0;
1655: }
1657: ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
1658: ISGetIndices(isrow, &r);
1659: ISGetIndices(isicol, &ic);
1661: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1662: PetscMalloc1(n + 1, &bi);
1663: PetscMalloc1(n + 1, &bdiag);
1664: bi[0] = bdiag[0] = 0;
1665: PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr);
1667: /* create a linked list for storing column indices of the active row */
1668: nlnk = n + 1;
1669: PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt);
1671: /* initial FreeSpace size is f*(ai[n]+1) */
1672: f = info->fill;
1673: diagonal_fill = (PetscInt)info->diagonal_fill;
1674: PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space);
1675: current_space = free_space;
1676: PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl);
1677: current_space_lvl = free_space_lvl;
1678: for (i = 0; i < n; i++) {
1679: nzi = 0;
1680: /* copy current row into linked list */
1681: nnz = ai[r[i] + 1] - ai[r[i]];
1683: cols = aj + ai[r[i]];
1684: lnk[i] = -1; /* marker to indicate if diagonal exists */
1685: PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt);
1686: nzi += nlnk;
1688: /* make sure diagonal entry is included */
1689: if (diagonal_fill && lnk[i] == -1) {
1690: fm = n;
1691: while (lnk[fm] < i) fm = lnk[fm];
1692: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1693: lnk[fm] = i;
1694: lnk_lvl[i] = 0;
1695: nzi++;
1696: dcount++;
1697: }
1699: /* add pivot rows into the active row */
1700: nzbd = 0;
1701: prow = lnk[n];
1702: while (prow < i) {
1703: nnz = bdiag[prow];
1704: cols = bj_ptr[prow] + nnz + 1;
1705: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1706: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1707: PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow);
1708: nzi += nlnk;
1709: prow = lnk[prow];
1710: nzbd++;
1711: }
1712: bdiag[i] = nzbd;
1713: bi[i + 1] = bi[i] + nzi;
1714: /* if free space is not available, make more free space */
1715: if (current_space->local_remaining < nzi) {
1716: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1717: PetscFreeSpaceGet(nnz, ¤t_space);
1718: PetscFreeSpaceGet(nnz, ¤t_space_lvl);
1719: reallocs++;
1720: }
1722: /* copy data into free_space and free_space_lvl, then initialize lnk */
1723: PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt);
1724: bj_ptr[i] = current_space->array;
1725: bjlvl_ptr[i] = current_space_lvl->array;
1727: /* make sure the active row i has diagonal entry */
1730: current_space->array += nzi;
1731: current_space->local_used += nzi;
1732: current_space->local_remaining -= nzi;
1733: current_space_lvl->array += nzi;
1734: current_space_lvl->local_used += nzi;
1735: current_space_lvl->local_remaining -= nzi;
1736: }
1738: ISRestoreIndices(isrow, &r);
1739: ISRestoreIndices(isicol, &ic);
1740: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1741: PetscMalloc1(bi[n] + 1, &bj);
1742: PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag);
1744: PetscIncompleteLLDestroy(lnk, lnkbt);
1745: PetscFreeSpaceDestroy(free_space_lvl);
1746: PetscFree2(bj_ptr, bjlvl_ptr);
1748: #if defined(PETSC_USE_INFO)
1749: {
1750: PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1751: PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af);
1752: PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af);
1753: PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af);
1754: PetscInfo(A, "for best performance.\n");
1755: if (diagonal_fill) PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount);
1756: }
1757: #endif
1758: /* put together the new matrix */
1759: MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL);
1760: b = (Mat_SeqAIJ *)(fact)->data;
1762: b->free_a = PETSC_TRUE;
1763: b->free_ij = PETSC_TRUE;
1764: b->singlemalloc = PETSC_FALSE;
1766: PetscMalloc1(bdiag[0] + 1, &b->a);
1768: b->j = bj;
1769: b->i = bi;
1770: b->diag = bdiag;
1771: b->ilen = NULL;
1772: b->imax = NULL;
1773: b->row = isrow;
1774: b->col = iscol;
1775: PetscObjectReference((PetscObject)isrow);
1776: PetscObjectReference((PetscObject)iscol);
1777: b->icol = isicol;
1779: PetscMalloc1(n + 1, &b->solve_work);
1780: /* In b structure: Free imax, ilen, old a, old j.
1781: Allocate bdiag, solve_work, new a, new j */
1782: b->maxnz = b->nz = bdiag[0] + 1;
1784: (fact)->info.factor_mallocs = reallocs;
1785: (fact)->info.fill_ratio_given = f;
1786: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1787: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1788: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1789: MatSeqAIJCheckInode_FactorLU(fact);
1790: return 0;
1791: }
1793: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1794: {
1795: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1796: IS isicol;
1797: const PetscInt *r, *ic;
1798: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1799: PetscInt *bi, *cols, nnz, *cols_lvl;
1800: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1801: PetscInt i, levels, diagonal_fill;
1802: PetscBool col_identity, row_identity;
1803: PetscReal f;
1804: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1805: PetscBT lnkbt;
1806: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1807: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1808: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1809: PetscBool missing;
1812: MatMissingDiagonal(A, &missing, &i);
1815: f = info->fill;
1816: levels = (PetscInt)info->levels;
1817: diagonal_fill = (PetscInt)info->diagonal_fill;
1819: ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
1821: ISIdentity(isrow, &row_identity);
1822: ISIdentity(iscol, &col_identity);
1823: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1824: MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE);
1826: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1827: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1828: fact->factortype = MAT_FACTOR_ILU;
1829: (fact)->info.factor_mallocs = 0;
1830: (fact)->info.fill_ratio_given = info->fill;
1831: (fact)->info.fill_ratio_needed = 1.0;
1833: b = (Mat_SeqAIJ *)(fact)->data;
1834: b->row = isrow;
1835: b->col = iscol;
1836: b->icol = isicol;
1837: PetscMalloc1((fact)->rmap->n + 1, &b->solve_work);
1838: PetscObjectReference((PetscObject)isrow);
1839: PetscObjectReference((PetscObject)iscol);
1840: return 0;
1841: }
1843: ISGetIndices(isrow, &r);
1844: ISGetIndices(isicol, &ic);
1846: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1847: PetscMalloc1(n + 1, &bi);
1848: PetscMalloc1(n + 1, &bdiag);
1849: bi[0] = bdiag[0] = 0;
1851: PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr);
1853: /* create a linked list for storing column indices of the active row */
1854: nlnk = n + 1;
1855: PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt);
1857: /* initial FreeSpace size is f*(ai[n]+1) */
1858: PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space);
1859: current_space = free_space;
1860: PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl);
1861: current_space_lvl = free_space_lvl;
1863: for (i = 0; i < n; i++) {
1864: nzi = 0;
1865: /* copy current row into linked list */
1866: nnz = ai[r[i] + 1] - ai[r[i]];
1868: cols = aj + ai[r[i]];
1869: lnk[i] = -1; /* marker to indicate if diagonal exists */
1870: PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt);
1871: nzi += nlnk;
1873: /* make sure diagonal entry is included */
1874: if (diagonal_fill && lnk[i] == -1) {
1875: fm = n;
1876: while (lnk[fm] < i) fm = lnk[fm];
1877: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1878: lnk[fm] = i;
1879: lnk_lvl[i] = 0;
1880: nzi++;
1881: dcount++;
1882: }
1884: /* add pivot rows into the active row */
1885: nzbd = 0;
1886: prow = lnk[n];
1887: while (prow < i) {
1888: nnz = bdiag[prow];
1889: cols = bj_ptr[prow] + nnz + 1;
1890: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1891: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1892: PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow);
1893: nzi += nlnk;
1894: prow = lnk[prow];
1895: nzbd++;
1896: }
1897: bdiag[i] = nzbd;
1898: bi[i + 1] = bi[i] + nzi;
1900: /* if free space is not available, make more free space */
1901: if (current_space->local_remaining < nzi) {
1902: nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1903: PetscFreeSpaceGet(nnz, ¤t_space);
1904: PetscFreeSpaceGet(nnz, ¤t_space_lvl);
1905: reallocs++;
1906: }
1908: /* copy data into free_space and free_space_lvl, then initialize lnk */
1909: PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt);
1910: bj_ptr[i] = current_space->array;
1911: bjlvl_ptr[i] = current_space_lvl->array;
1913: /* make sure the active row i has diagonal entry */
1916: current_space->array += nzi;
1917: current_space->local_used += nzi;
1918: current_space->local_remaining -= nzi;
1919: current_space_lvl->array += nzi;
1920: current_space_lvl->local_used += nzi;
1921: current_space_lvl->local_remaining -= nzi;
1922: }
1924: ISRestoreIndices(isrow, &r);
1925: ISRestoreIndices(isicol, &ic);
1927: /* destroy list of free space and other temporary arrays */
1928: PetscMalloc1(bi[n] + 1, &bj);
1929: PetscFreeSpaceContiguous(&free_space, bj); /* copy free_space -> bj */
1930: PetscIncompleteLLDestroy(lnk, lnkbt);
1931: PetscFreeSpaceDestroy(free_space_lvl);
1932: PetscFree2(bj_ptr, bjlvl_ptr);
1934: #if defined(PETSC_USE_INFO)
1935: {
1936: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1937: PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af);
1938: PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af);
1939: PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af);
1940: PetscInfo(A, "for best performance.\n");
1941: if (diagonal_fill) PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount);
1942: }
1943: #endif
1945: /* put together the new matrix */
1946: MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL);
1947: b = (Mat_SeqAIJ *)(fact)->data;
1949: b->free_a = PETSC_TRUE;
1950: b->free_ij = PETSC_TRUE;
1951: b->singlemalloc = PETSC_FALSE;
1953: PetscMalloc1(bi[n], &b->a);
1954: b->j = bj;
1955: b->i = bi;
1956: for (i = 0; i < n; i++) bdiag[i] += bi[i];
1957: b->diag = bdiag;
1958: b->ilen = NULL;
1959: b->imax = NULL;
1960: b->row = isrow;
1961: b->col = iscol;
1962: PetscObjectReference((PetscObject)isrow);
1963: PetscObjectReference((PetscObject)iscol);
1964: b->icol = isicol;
1965: PetscMalloc1(n + 1, &b->solve_work);
1966: /* In b structure: Free imax, ilen, old a, old j.
1967: Allocate bdiag, solve_work, new a, new j */
1968: b->maxnz = b->nz = bi[n];
1970: (fact)->info.factor_mallocs = reallocs;
1971: (fact)->info.fill_ratio_given = f;
1972: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
1973: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1974: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1975: return 0;
1976: }
1978: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
1979: {
1980: Mat C = B;
1981: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1982: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
1983: IS ip = b->row, iip = b->icol;
1984: const PetscInt *rip, *riip;
1985: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1986: PetscInt *ai = a->i, *aj = a->j;
1987: PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1988: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1989: PetscBool perm_identity;
1990: FactorShiftCtx sctx;
1991: PetscReal rs;
1992: MatScalar d, *v;
1994: /* MatPivotSetUp(): initialize shift context sctx */
1995: PetscMemzero(&sctx, sizeof(FactorShiftCtx));
1997: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1998: sctx.shift_top = info->zeropivot;
1999: for (i = 0; i < mbs; i++) {
2000: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2001: d = (aa)[a->diag[i]];
2002: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2003: v = aa + ai[i];
2004: nz = ai[i + 1] - ai[i];
2005: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2006: if (rs > sctx.shift_top) sctx.shift_top = rs;
2007: }
2008: sctx.shift_top *= 1.1;
2009: sctx.nshift_max = 5;
2010: sctx.shift_lo = 0.;
2011: sctx.shift_hi = 1.;
2012: }
2014: ISGetIndices(ip, &rip);
2015: ISGetIndices(iip, &riip);
2017: /* allocate working arrays
2018: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2019: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2020: */
2021: PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r);
2023: do {
2024: sctx.newshift = PETSC_FALSE;
2026: for (i = 0; i < mbs; i++) c2r[i] = mbs;
2027: if (mbs) il[0] = 0;
2029: for (k = 0; k < mbs; k++) {
2030: /* zero rtmp */
2031: nz = bi[k + 1] - bi[k];
2032: bjtmp = bj + bi[k];
2033: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2035: /* load in initial unfactored row */
2036: bval = ba + bi[k];
2037: jmin = ai[rip[k]];
2038: jmax = ai[rip[k] + 1];
2039: for (j = jmin; j < jmax; j++) {
2040: col = riip[aj[j]];
2041: if (col >= k) { /* only take upper triangular entry */
2042: rtmp[col] = aa[j];
2043: *bval++ = 0.0; /* for in-place factorization */
2044: }
2045: }
2046: /* shift the diagonal of the matrix: ZeropivotApply() */
2047: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2049: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2050: dk = rtmp[k];
2051: i = c2r[k]; /* first row to be added to k_th row */
2053: while (i < k) {
2054: nexti = c2r[i]; /* next row to be added to k_th row */
2056: /* compute multiplier, update diag(k) and U(i,k) */
2057: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2058: uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2059: dk += uikdi * ba[ili]; /* update diag[k] */
2060: ba[ili] = uikdi; /* -U(i,k) */
2062: /* add multiple of row i to k-th row */
2063: jmin = ili + 1;
2064: jmax = bi[i + 1];
2065: if (jmin < jmax) {
2066: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2067: /* update il and c2r for row i */
2068: il[i] = jmin;
2069: j = bj[jmin];
2070: c2r[i] = c2r[j];
2071: c2r[j] = i;
2072: }
2073: i = nexti;
2074: }
2076: /* copy data into U(k,:) */
2077: rs = 0.0;
2078: jmin = bi[k];
2079: jmax = bi[k + 1] - 1;
2080: if (jmin < jmax) {
2081: for (j = jmin; j < jmax; j++) {
2082: col = bj[j];
2083: ba[j] = rtmp[col];
2084: rs += PetscAbsScalar(ba[j]);
2085: }
2086: /* add the k-th row into il and c2r */
2087: il[k] = jmin;
2088: i = bj[jmin];
2089: c2r[k] = c2r[i];
2090: c2r[i] = k;
2091: }
2093: /* MatPivotCheck() */
2094: sctx.rs = rs;
2095: sctx.pv = dk;
2096: MatPivotCheck(B, A, info, &sctx, i);
2097: if (sctx.newshift) break;
2098: dk = sctx.pv;
2100: ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2101: }
2102: } while (sctx.newshift);
2104: PetscFree3(rtmp, il, c2r);
2105: ISRestoreIndices(ip, &rip);
2106: ISRestoreIndices(iip, &riip);
2108: ISIdentity(ip, &perm_identity);
2109: if (perm_identity) {
2110: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2111: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2112: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2113: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2114: } else {
2115: B->ops->solve = MatSolve_SeqSBAIJ_1;
2116: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2117: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2118: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2119: }
2121: C->assembled = PETSC_TRUE;
2122: C->preallocated = PETSC_TRUE;
2124: PetscLogFlops(C->rmap->n);
2126: /* MatPivotView() */
2127: if (sctx.nshift) {
2128: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2129: PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top);
2130: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2131: PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
2132: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2133: PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount);
2134: }
2135: }
2136: return 0;
2137: }
2139: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2140: {
2141: Mat C = B;
2142: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2143: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
2144: IS ip = b->row, iip = b->icol;
2145: const PetscInt *rip, *riip;
2146: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2147: PetscInt *ai = a->i, *aj = a->j;
2148: PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2149: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2150: PetscBool perm_identity;
2151: FactorShiftCtx sctx;
2152: PetscReal rs;
2153: MatScalar d, *v;
2155: /* MatPivotSetUp(): initialize shift context sctx */
2156: PetscMemzero(&sctx, sizeof(FactorShiftCtx));
2158: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2159: sctx.shift_top = info->zeropivot;
2160: for (i = 0; i < mbs; i++) {
2161: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2162: d = (aa)[a->diag[i]];
2163: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2164: v = aa + ai[i];
2165: nz = ai[i + 1] - ai[i];
2166: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2167: if (rs > sctx.shift_top) sctx.shift_top = rs;
2168: }
2169: sctx.shift_top *= 1.1;
2170: sctx.nshift_max = 5;
2171: sctx.shift_lo = 0.;
2172: sctx.shift_hi = 1.;
2173: }
2175: ISGetIndices(ip, &rip);
2176: ISGetIndices(iip, &riip);
2178: /* initialization */
2179: PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl);
2181: do {
2182: sctx.newshift = PETSC_FALSE;
2184: for (i = 0; i < mbs; i++) jl[i] = mbs;
2185: il[0] = 0;
2187: for (k = 0; k < mbs; k++) {
2188: /* zero rtmp */
2189: nz = bi[k + 1] - bi[k];
2190: bjtmp = bj + bi[k];
2191: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2193: bval = ba + bi[k];
2194: /* initialize k-th row by the perm[k]-th row of A */
2195: jmin = ai[rip[k]];
2196: jmax = ai[rip[k] + 1];
2197: for (j = jmin; j < jmax; j++) {
2198: col = riip[aj[j]];
2199: if (col >= k) { /* only take upper triangular entry */
2200: rtmp[col] = aa[j];
2201: *bval++ = 0.0; /* for in-place factorization */
2202: }
2203: }
2204: /* shift the diagonal of the matrix */
2205: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2207: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2208: dk = rtmp[k];
2209: i = jl[k]; /* first row to be added to k_th row */
2211: while (i < k) {
2212: nexti = jl[i]; /* next row to be added to k_th row */
2214: /* compute multiplier, update diag(k) and U(i,k) */
2215: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2216: uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2217: dk += uikdi * ba[ili];
2218: ba[ili] = uikdi; /* -U(i,k) */
2220: /* add multiple of row i to k-th row */
2221: jmin = ili + 1;
2222: jmax = bi[i + 1];
2223: if (jmin < jmax) {
2224: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2225: /* update il and jl for row i */
2226: il[i] = jmin;
2227: j = bj[jmin];
2228: jl[i] = jl[j];
2229: jl[j] = i;
2230: }
2231: i = nexti;
2232: }
2234: /* shift the diagonals when zero pivot is detected */
2235: /* compute rs=sum of abs(off-diagonal) */
2236: rs = 0.0;
2237: jmin = bi[k] + 1;
2238: nz = bi[k + 1] - jmin;
2239: bcol = bj + jmin;
2240: for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2242: sctx.rs = rs;
2243: sctx.pv = dk;
2244: MatPivotCheck(B, A, info, &sctx, k);
2245: if (sctx.newshift) break;
2246: dk = sctx.pv;
2248: /* copy data into U(k,:) */
2249: ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2250: jmin = bi[k] + 1;
2251: jmax = bi[k + 1];
2252: if (jmin < jmax) {
2253: for (j = jmin; j < jmax; j++) {
2254: col = bj[j];
2255: ba[j] = rtmp[col];
2256: }
2257: /* add the k-th row into il and jl */
2258: il[k] = jmin;
2259: i = bj[jmin];
2260: jl[k] = jl[i];
2261: jl[i] = k;
2262: }
2263: }
2264: } while (sctx.newshift);
2266: PetscFree3(rtmp, il, jl);
2267: ISRestoreIndices(ip, &rip);
2268: ISRestoreIndices(iip, &riip);
2270: ISIdentity(ip, &perm_identity);
2271: if (perm_identity) {
2272: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2273: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2274: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2275: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2276: } else {
2277: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2278: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2279: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2280: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2281: }
2283: C->assembled = PETSC_TRUE;
2284: C->preallocated = PETSC_TRUE;
2286: PetscLogFlops(C->rmap->n);
2287: if (sctx.nshift) {
2288: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2289: PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
2290: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2291: PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount);
2292: }
2293: }
2294: return 0;
2295: }
2297: /*
2298: icc() under revised new data structure.
2299: Factored arrays bj and ba are stored as
2300: U(0,:),...,U(i,:),U(n-1,:)
2302: ui=fact->i is an array of size n+1, in which
2303: ui+
2304: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2305: ui[n]: points to U(n-1,n-1)+1
2307: udiag=fact->diag is an array of size n,in which
2308: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2310: U(i,:) contains udiag[i] as its last entry, i.e.,
2311: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2312: */
2314: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2315: {
2316: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2317: Mat_SeqSBAIJ *b;
2318: PetscBool perm_identity, missing;
2319: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2320: const PetscInt *rip, *riip;
2321: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2322: PetscInt nlnk, *lnk, *lnk_lvl = NULL, d;
2323: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2324: PetscReal fill = info->fill, levels = info->levels;
2325: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2326: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2327: PetscBT lnkbt;
2328: IS iperm;
2331: MatMissingDiagonal(A, &missing, &d);
2333: ISIdentity(perm, &perm_identity);
2334: ISInvertPermutation(perm, PETSC_DECIDE, &iperm);
2336: PetscMalloc1(am + 1, &ui);
2337: PetscMalloc1(am + 1, &udiag);
2338: ui[0] = 0;
2340: /* ICC(0) without matrix ordering: simply rearrange column indices */
2341: if (!levels && perm_identity) {
2342: for (i = 0; i < am; i++) {
2343: ncols = ai[i + 1] - a->diag[i];
2344: ui[i + 1] = ui[i] + ncols;
2345: udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2346: }
2347: PetscMalloc1(ui[am] + 1, &uj);
2348: cols = uj;
2349: for (i = 0; i < am; i++) {
2350: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2351: ncols = ai[i + 1] - a->diag[i] - 1;
2352: for (j = 0; j < ncols; j++) *cols++ = aj[j];
2353: *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2354: }
2355: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2356: ISGetIndices(iperm, &riip);
2357: ISGetIndices(perm, &rip);
2359: /* initialization */
2360: PetscMalloc1(am + 1, &ajtmp);
2362: /* jl: linked list for storing indices of the pivot rows
2363: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2364: PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il);
2365: for (i = 0; i < am; i++) {
2366: jl[i] = am;
2367: il[i] = 0;
2368: }
2370: /* create and initialize a linked list for storing column indices of the active row k */
2371: nlnk = am + 1;
2372: PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt);
2374: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2375: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space);
2376: current_space = free_space;
2377: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl);
2378: current_space_lvl = free_space_lvl;
2380: for (k = 0; k < am; k++) { /* for each active row k */
2381: /* initialize lnk by the column indices of row rip[k] of A */
2382: nzk = 0;
2383: ncols = ai[rip[k] + 1] - ai[rip[k]];
2385: ncols_upper = 0;
2386: for (j = 0; j < ncols; j++) {
2387: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2388: if (riip[i] >= k) { /* only take upper triangular entry */
2389: ajtmp[ncols_upper] = i;
2390: ncols_upper++;
2391: }
2392: }
2393: PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt);
2394: nzk += nlnk;
2396: /* update lnk by computing fill-in for each pivot row to be merged in */
2397: prow = jl[k]; /* 1st pivot row */
2399: while (prow < k) {
2400: nextprow = jl[prow];
2402: /* merge prow into k-th row */
2403: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2404: jmax = ui[prow + 1];
2405: ncols = jmax - jmin;
2406: i = jmin - ui[prow];
2407: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2408: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2409: j = *(uj - 1);
2410: PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j);
2411: nzk += nlnk;
2413: /* update il and jl for prow */
2414: if (jmin < jmax) {
2415: il[prow] = jmin;
2416: j = *cols;
2417: jl[prow] = jl[j];
2418: jl[j] = prow;
2419: }
2420: prow = nextprow;
2421: }
2423: /* if free space is not available, make more free space */
2424: if (current_space->local_remaining < nzk) {
2425: i = am - k + 1; /* num of unfactored rows */
2426: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2427: PetscFreeSpaceGet(i, ¤t_space);
2428: PetscFreeSpaceGet(i, ¤t_space_lvl);
2429: reallocs++;
2430: }
2432: /* copy data into free_space and free_space_lvl, then initialize lnk */
2434: PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt);
2436: /* add the k-th row into il and jl */
2437: if (nzk > 1) {
2438: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2439: jl[k] = jl[i];
2440: jl[i] = k;
2441: il[k] = ui[k] + 1;
2442: }
2443: uj_ptr[k] = current_space->array;
2444: uj_lvl_ptr[k] = current_space_lvl->array;
2446: current_space->array += nzk;
2447: current_space->local_used += nzk;
2448: current_space->local_remaining -= nzk;
2450: current_space_lvl->array += nzk;
2451: current_space_lvl->local_used += nzk;
2452: current_space_lvl->local_remaining -= nzk;
2454: ui[k + 1] = ui[k] + nzk;
2455: }
2457: ISRestoreIndices(perm, &rip);
2458: ISRestoreIndices(iperm, &riip);
2459: PetscFree4(uj_ptr, uj_lvl_ptr, jl, il);
2460: PetscFree(ajtmp);
2462: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2463: PetscMalloc1(ui[am] + 1, &uj);
2464: PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag); /* store matrix factor */
2465: PetscIncompleteLLDestroy(lnk, lnkbt);
2466: PetscFreeSpaceDestroy(free_space_lvl);
2468: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2470: /* put together the new matrix in MATSEQSBAIJ format */
2471: b = (Mat_SeqSBAIJ *)(fact)->data;
2472: b->singlemalloc = PETSC_FALSE;
2474: PetscMalloc1(ui[am] + 1, &b->a);
2476: b->j = uj;
2477: b->i = ui;
2478: b->diag = udiag;
2479: b->free_diag = PETSC_TRUE;
2480: b->ilen = NULL;
2481: b->imax = NULL;
2482: b->row = perm;
2483: b->col = perm;
2484: PetscObjectReference((PetscObject)perm);
2485: PetscObjectReference((PetscObject)perm);
2486: b->icol = iperm;
2487: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2489: PetscMalloc1(am + 1, &b->solve_work);
2491: b->maxnz = b->nz = ui[am];
2492: b->free_a = PETSC_TRUE;
2493: b->free_ij = PETSC_TRUE;
2495: fact->info.factor_mallocs = reallocs;
2496: fact->info.fill_ratio_given = fill;
2497: if (ai[am] != 0) {
2498: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2499: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2500: } else {
2501: fact->info.fill_ratio_needed = 0.0;
2502: }
2503: #if defined(PETSC_USE_INFO)
2504: if (ai[am] != 0) {
2505: PetscReal af = fact->info.fill_ratio_needed;
2506: PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af);
2507: PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
2508: PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af);
2509: } else {
2510: PetscInfo(A, "Empty matrix\n");
2511: }
2512: #endif
2513: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2514: return 0;
2515: }
2517: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2518: {
2519: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2520: Mat_SeqSBAIJ *b;
2521: PetscBool perm_identity, missing;
2522: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2523: const PetscInt *rip, *riip;
2524: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2525: PetscInt nlnk, *lnk, *lnk_lvl = NULL, d;
2526: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2527: PetscReal fill = info->fill, levels = info->levels;
2528: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2529: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2530: PetscBT lnkbt;
2531: IS iperm;
2534: MatMissingDiagonal(A, &missing, &d);
2536: ISIdentity(perm, &perm_identity);
2537: ISInvertPermutation(perm, PETSC_DECIDE, &iperm);
2539: PetscMalloc1(am + 1, &ui);
2540: PetscMalloc1(am + 1, &udiag);
2541: ui[0] = 0;
2543: /* ICC(0) without matrix ordering: simply copies fill pattern */
2544: if (!levels && perm_identity) {
2545: for (i = 0; i < am; i++) {
2546: ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2547: udiag[i] = ui[i];
2548: }
2549: PetscMalloc1(ui[am] + 1, &uj);
2550: cols = uj;
2551: for (i = 0; i < am; i++) {
2552: aj = a->j + a->diag[i];
2553: ncols = ui[i + 1] - ui[i];
2554: for (j = 0; j < ncols; j++) *cols++ = *aj++;
2555: }
2556: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2557: ISGetIndices(iperm, &riip);
2558: ISGetIndices(perm, &rip);
2560: /* initialization */
2561: PetscMalloc1(am + 1, &ajtmp);
2563: /* jl: linked list for storing indices of the pivot rows
2564: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2565: PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il);
2566: for (i = 0; i < am; i++) {
2567: jl[i] = am;
2568: il[i] = 0;
2569: }
2571: /* create and initialize a linked list for storing column indices of the active row k */
2572: nlnk = am + 1;
2573: PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt);
2575: /* initial FreeSpace size is fill*(ai[am]+1) */
2576: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space);
2577: current_space = free_space;
2578: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl);
2579: current_space_lvl = free_space_lvl;
2581: for (k = 0; k < am; k++) { /* for each active row k */
2582: /* initialize lnk by the column indices of row rip[k] of A */
2583: nzk = 0;
2584: ncols = ai[rip[k] + 1] - ai[rip[k]];
2586: ncols_upper = 0;
2587: for (j = 0; j < ncols; j++) {
2588: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2589: if (riip[i] >= k) { /* only take upper triangular entry */
2590: ajtmp[ncols_upper] = i;
2591: ncols_upper++;
2592: }
2593: }
2594: PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt);
2595: nzk += nlnk;
2597: /* update lnk by computing fill-in for each pivot row to be merged in */
2598: prow = jl[k]; /* 1st pivot row */
2600: while (prow < k) {
2601: nextprow = jl[prow];
2603: /* merge prow into k-th row */
2604: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2605: jmax = ui[prow + 1];
2606: ncols = jmax - jmin;
2607: i = jmin - ui[prow];
2608: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2609: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2610: j = *(uj - 1);
2611: PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j);
2612: nzk += nlnk;
2614: /* update il and jl for prow */
2615: if (jmin < jmax) {
2616: il[prow] = jmin;
2617: j = *cols;
2618: jl[prow] = jl[j];
2619: jl[j] = prow;
2620: }
2621: prow = nextprow;
2622: }
2624: /* if free space is not available, make more free space */
2625: if (current_space->local_remaining < nzk) {
2626: i = am - k + 1; /* num of unfactored rows */
2627: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2628: PetscFreeSpaceGet(i, ¤t_space);
2629: PetscFreeSpaceGet(i, ¤t_space_lvl);
2630: reallocs++;
2631: }
2633: /* copy data into free_space and free_space_lvl, then initialize lnk */
2635: PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt);
2637: /* add the k-th row into il and jl */
2638: if (nzk > 1) {
2639: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2640: jl[k] = jl[i];
2641: jl[i] = k;
2642: il[k] = ui[k] + 1;
2643: }
2644: uj_ptr[k] = current_space->array;
2645: uj_lvl_ptr[k] = current_space_lvl->array;
2647: current_space->array += nzk;
2648: current_space->local_used += nzk;
2649: current_space->local_remaining -= nzk;
2651: current_space_lvl->array += nzk;
2652: current_space_lvl->local_used += nzk;
2653: current_space_lvl->local_remaining -= nzk;
2655: ui[k + 1] = ui[k] + nzk;
2656: }
2658: #if defined(PETSC_USE_INFO)
2659: if (ai[am] != 0) {
2660: PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2661: PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af);
2662: PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
2663: PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af);
2664: } else {
2665: PetscInfo(A, "Empty matrix\n");
2666: }
2667: #endif
2669: ISRestoreIndices(perm, &rip);
2670: ISRestoreIndices(iperm, &riip);
2671: PetscFree4(uj_ptr, uj_lvl_ptr, jl, il);
2672: PetscFree(ajtmp);
2674: /* destroy list of free space and other temporary array(s) */
2675: PetscMalloc1(ui[am] + 1, &uj);
2676: PetscFreeSpaceContiguous(&free_space, uj);
2677: PetscIncompleteLLDestroy(lnk, lnkbt);
2678: PetscFreeSpaceDestroy(free_space_lvl);
2680: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2682: /* put together the new matrix in MATSEQSBAIJ format */
2684: b = (Mat_SeqSBAIJ *)fact->data;
2685: b->singlemalloc = PETSC_FALSE;
2687: PetscMalloc1(ui[am] + 1, &b->a);
2689: b->j = uj;
2690: b->i = ui;
2691: b->diag = udiag;
2692: b->free_diag = PETSC_TRUE;
2693: b->ilen = NULL;
2694: b->imax = NULL;
2695: b->row = perm;
2696: b->col = perm;
2698: PetscObjectReference((PetscObject)perm);
2699: PetscObjectReference((PetscObject)perm);
2701: b->icol = iperm;
2702: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2703: PetscMalloc1(am + 1, &b->solve_work);
2704: b->maxnz = b->nz = ui[am];
2705: b->free_a = PETSC_TRUE;
2706: b->free_ij = PETSC_TRUE;
2708: fact->info.factor_mallocs = reallocs;
2709: fact->info.fill_ratio_given = fill;
2710: if (ai[am] != 0) {
2711: fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2712: } else {
2713: fact->info.fill_ratio_needed = 0.0;
2714: }
2715: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2716: return 0;
2717: }
2719: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2720: {
2721: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2722: Mat_SeqSBAIJ *b;
2723: PetscBool perm_identity, missing;
2724: PetscReal fill = info->fill;
2725: const PetscInt *rip, *riip;
2726: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2727: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2728: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2729: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2730: PetscBT lnkbt;
2731: IS iperm;
2734: MatMissingDiagonal(A, &missing, &i);
2737: /* check whether perm is the identity mapping */
2738: ISIdentity(perm, &perm_identity);
2739: ISInvertPermutation(perm, PETSC_DECIDE, &iperm);
2740: ISGetIndices(iperm, &riip);
2741: ISGetIndices(perm, &rip);
2743: /* initialization */
2744: PetscMalloc1(am + 1, &ui);
2745: PetscMalloc1(am + 1, &udiag);
2746: ui[0] = 0;
2748: /* jl: linked list for storing indices of the pivot rows
2749: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2750: PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols);
2751: for (i = 0; i < am; i++) {
2752: jl[i] = am;
2753: il[i] = 0;
2754: }
2756: /* create and initialize a linked list for storing column indices of the active row k */
2757: nlnk = am + 1;
2758: PetscLLCreate(am, am, nlnk, lnk, lnkbt);
2760: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2761: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space);
2762: current_space = free_space;
2764: for (k = 0; k < am; k++) { /* for each active row k */
2765: /* initialize lnk by the column indices of row rip[k] of A */
2766: nzk = 0;
2767: ncols = ai[rip[k] + 1] - ai[rip[k]];
2769: ncols_upper = 0;
2770: for (j = 0; j < ncols; j++) {
2771: i = riip[*(aj + ai[rip[k]] + j)];
2772: if (i >= k) { /* only take upper triangular entry */
2773: cols[ncols_upper] = i;
2774: ncols_upper++;
2775: }
2776: }
2777: PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt);
2778: nzk += nlnk;
2780: /* update lnk by computing fill-in for each pivot row to be merged in */
2781: prow = jl[k]; /* 1st pivot row */
2783: while (prow < k) {
2784: nextprow = jl[prow];
2785: /* merge prow into k-th row */
2786: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2787: jmax = ui[prow + 1];
2788: ncols = jmax - jmin;
2789: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2790: PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt);
2791: nzk += nlnk;
2793: /* update il and jl for prow */
2794: if (jmin < jmax) {
2795: il[prow] = jmin;
2796: j = *uj_ptr;
2797: jl[prow] = jl[j];
2798: jl[j] = prow;
2799: }
2800: prow = nextprow;
2801: }
2803: /* if free space is not available, make more free space */
2804: if (current_space->local_remaining < nzk) {
2805: i = am - k + 1; /* num of unfactored rows */
2806: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2807: PetscFreeSpaceGet(i, ¤t_space);
2808: reallocs++;
2809: }
2811: /* copy data into free space, then initialize lnk */
2812: PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt);
2814: /* add the k-th row into il and jl */
2815: if (nzk > 1) {
2816: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2817: jl[k] = jl[i];
2818: jl[i] = k;
2819: il[k] = ui[k] + 1;
2820: }
2821: ui_ptr[k] = current_space->array;
2823: current_space->array += nzk;
2824: current_space->local_used += nzk;
2825: current_space->local_remaining -= nzk;
2827: ui[k + 1] = ui[k] + nzk;
2828: }
2830: ISRestoreIndices(perm, &rip);
2831: ISRestoreIndices(iperm, &riip);
2832: PetscFree4(ui_ptr, jl, il, cols);
2834: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2835: PetscMalloc1(ui[am] + 1, &uj);
2836: PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag); /* store matrix factor */
2837: PetscLLDestroy(lnk, lnkbt);
2839: /* put together the new matrix in MATSEQSBAIJ format */
2841: b = (Mat_SeqSBAIJ *)fact->data;
2842: b->singlemalloc = PETSC_FALSE;
2843: b->free_a = PETSC_TRUE;
2844: b->free_ij = PETSC_TRUE;
2846: PetscMalloc1(ui[am] + 1, &b->a);
2848: b->j = uj;
2849: b->i = ui;
2850: b->diag = udiag;
2851: b->free_diag = PETSC_TRUE;
2852: b->ilen = NULL;
2853: b->imax = NULL;
2854: b->row = perm;
2855: b->col = perm;
2857: PetscObjectReference((PetscObject)perm);
2858: PetscObjectReference((PetscObject)perm);
2860: b->icol = iperm;
2861: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2863: PetscMalloc1(am + 1, &b->solve_work);
2865: b->maxnz = b->nz = ui[am];
2867: fact->info.factor_mallocs = reallocs;
2868: fact->info.fill_ratio_given = fill;
2869: if (ai[am] != 0) {
2870: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2871: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2872: } else {
2873: fact->info.fill_ratio_needed = 0.0;
2874: }
2875: #if defined(PETSC_USE_INFO)
2876: if (ai[am] != 0) {
2877: PetscReal af = fact->info.fill_ratio_needed;
2878: PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af);
2879: PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
2880: PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af);
2881: } else {
2882: PetscInfo(A, "Empty matrix\n");
2883: }
2884: #endif
2885: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2886: return 0;
2887: }
2889: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2890: {
2891: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2892: Mat_SeqSBAIJ *b;
2893: PetscBool perm_identity, missing;
2894: PetscReal fill = info->fill;
2895: const PetscInt *rip, *riip;
2896: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2897: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2898: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2899: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2900: PetscBT lnkbt;
2901: IS iperm;
2904: MatMissingDiagonal(A, &missing, &i);
2907: /* check whether perm is the identity mapping */
2908: ISIdentity(perm, &perm_identity);
2909: ISInvertPermutation(perm, PETSC_DECIDE, &iperm);
2910: ISGetIndices(iperm, &riip);
2911: ISGetIndices(perm, &rip);
2913: /* initialization */
2914: PetscMalloc1(am + 1, &ui);
2915: ui[0] = 0;
2917: /* jl: linked list for storing indices of the pivot rows
2918: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2919: PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols);
2920: for (i = 0; i < am; i++) {
2921: jl[i] = am;
2922: il[i] = 0;
2923: }
2925: /* create and initialize a linked list for storing column indices of the active row k */
2926: nlnk = am + 1;
2927: PetscLLCreate(am, am, nlnk, lnk, lnkbt);
2929: /* initial FreeSpace size is fill*(ai[am]+1) */
2930: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space);
2931: current_space = free_space;
2933: for (k = 0; k < am; k++) { /* for each active row k */
2934: /* initialize lnk by the column indices of row rip[k] of A */
2935: nzk = 0;
2936: ncols = ai[rip[k] + 1] - ai[rip[k]];
2938: ncols_upper = 0;
2939: for (j = 0; j < ncols; j++) {
2940: i = riip[*(aj + ai[rip[k]] + j)];
2941: if (i >= k) { /* only take upper triangular entry */
2942: cols[ncols_upper] = i;
2943: ncols_upper++;
2944: }
2945: }
2946: PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt);
2947: nzk += nlnk;
2949: /* update lnk by computing fill-in for each pivot row to be merged in */
2950: prow = jl[k]; /* 1st pivot row */
2952: while (prow < k) {
2953: nextprow = jl[prow];
2954: /* merge prow into k-th row */
2955: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2956: jmax = ui[prow + 1];
2957: ncols = jmax - jmin;
2958: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2959: PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt);
2960: nzk += nlnk;
2962: /* update il and jl for prow */
2963: if (jmin < jmax) {
2964: il[prow] = jmin;
2965: j = *uj_ptr;
2966: jl[prow] = jl[j];
2967: jl[j] = prow;
2968: }
2969: prow = nextprow;
2970: }
2972: /* if free space is not available, make more free space */
2973: if (current_space->local_remaining < nzk) {
2974: i = am - k + 1; /* num of unfactored rows */
2975: i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2976: PetscFreeSpaceGet(i, ¤t_space);
2977: reallocs++;
2978: }
2980: /* copy data into free space, then initialize lnk */
2981: PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt);
2983: /* add the k-th row into il and jl */
2984: if (nzk - 1 > 0) {
2985: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2986: jl[k] = jl[i];
2987: jl[i] = k;
2988: il[k] = ui[k] + 1;
2989: }
2990: ui_ptr[k] = current_space->array;
2992: current_space->array += nzk;
2993: current_space->local_used += nzk;
2994: current_space->local_remaining -= nzk;
2996: ui[k + 1] = ui[k] + nzk;
2997: }
2999: #if defined(PETSC_USE_INFO)
3000: if (ai[am] != 0) {
3001: PetscReal af = (PetscReal)(ui[am]) / ((PetscReal)ai[am]);
3002: PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af);
3003: PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
3004: PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af);
3005: } else {
3006: PetscInfo(A, "Empty matrix\n");
3007: }
3008: #endif
3010: ISRestoreIndices(perm, &rip);
3011: ISRestoreIndices(iperm, &riip);
3012: PetscFree4(ui_ptr, jl, il, cols);
3014: /* destroy list of free space and other temporary array(s) */
3015: PetscMalloc1(ui[am] + 1, &uj);
3016: PetscFreeSpaceContiguous(&free_space, uj);
3017: PetscLLDestroy(lnk, lnkbt);
3019: /* put together the new matrix in MATSEQSBAIJ format */
3021: b = (Mat_SeqSBAIJ *)fact->data;
3022: b->singlemalloc = PETSC_FALSE;
3023: b->free_a = PETSC_TRUE;
3024: b->free_ij = PETSC_TRUE;
3026: PetscMalloc1(ui[am] + 1, &b->a);
3028: b->j = uj;
3029: b->i = ui;
3030: b->diag = NULL;
3031: b->ilen = NULL;
3032: b->imax = NULL;
3033: b->row = perm;
3034: b->col = perm;
3036: PetscObjectReference((PetscObject)perm);
3037: PetscObjectReference((PetscObject)perm);
3039: b->icol = iperm;
3040: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3042: PetscMalloc1(am + 1, &b->solve_work);
3043: b->maxnz = b->nz = ui[am];
3045: fact->info.factor_mallocs = reallocs;
3046: fact->info.fill_ratio_given = fill;
3047: if (ai[am] != 0) {
3048: fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
3049: } else {
3050: fact->info.fill_ratio_needed = 0.0;
3051: }
3052: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3053: return 0;
3054: }
3056: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3057: {
3058: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3059: PetscInt n = A->rmap->n;
3060: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3061: PetscScalar *x, sum;
3062: const PetscScalar *b;
3063: const MatScalar *aa = a->a, *v;
3064: PetscInt i, nz;
3066: if (!n) return 0;
3068: VecGetArrayRead(bb, &b);
3069: VecGetArrayWrite(xx, &x);
3071: /* forward solve the lower triangular */
3072: x[0] = b[0];
3073: v = aa;
3074: vi = aj;
3075: for (i = 1; i < n; i++) {
3076: nz = ai[i + 1] - ai[i];
3077: sum = b[i];
3078: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3079: v += nz;
3080: vi += nz;
3081: x[i] = sum;
3082: }
3084: /* backward solve the upper triangular */
3085: for (i = n - 1; i >= 0; i--) {
3086: v = aa + adiag[i + 1] + 1;
3087: vi = aj + adiag[i + 1] + 1;
3088: nz = adiag[i] - adiag[i + 1] - 1;
3089: sum = x[i];
3090: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3091: x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3092: }
3094: PetscLogFlops(2.0 * a->nz - A->cmap->n);
3095: VecRestoreArrayRead(bb, &b);
3096: VecRestoreArrayWrite(xx, &x);
3097: return 0;
3098: }
3100: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3101: {
3102: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3103: IS iscol = a->col, isrow = a->row;
3104: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3105: const PetscInt *rout, *cout, *r, *c;
3106: PetscScalar *x, *tmp, sum;
3107: const PetscScalar *b;
3108: const MatScalar *aa = a->a, *v;
3110: if (!n) return 0;
3112: VecGetArrayRead(bb, &b);
3113: VecGetArrayWrite(xx, &x);
3114: tmp = a->solve_work;
3116: ISGetIndices(isrow, &rout);
3117: r = rout;
3118: ISGetIndices(iscol, &cout);
3119: c = cout;
3121: /* forward solve the lower triangular */
3122: tmp[0] = b[r[0]];
3123: v = aa;
3124: vi = aj;
3125: for (i = 1; i < n; i++) {
3126: nz = ai[i + 1] - ai[i];
3127: sum = b[r[i]];
3128: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3129: tmp[i] = sum;
3130: v += nz;
3131: vi += nz;
3132: }
3134: /* backward solve the upper triangular */
3135: for (i = n - 1; i >= 0; i--) {
3136: v = aa + adiag[i + 1] + 1;
3137: vi = aj + adiag[i + 1] + 1;
3138: nz = adiag[i] - adiag[i + 1] - 1;
3139: sum = tmp[i];
3140: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3141: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3142: }
3144: ISRestoreIndices(isrow, &rout);
3145: ISRestoreIndices(iscol, &cout);
3146: VecRestoreArrayRead(bb, &b);
3147: VecRestoreArrayWrite(xx, &x);
3148: PetscLogFlops(2.0 * a->nz - A->cmap->n);
3149: return 0;
3150: }
3152: /*
3153: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3154: */
3155: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3156: {
3157: Mat B = *fact;
3158: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
3159: IS isicol;
3160: const PetscInt *r, *ic;
3161: PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3162: PetscInt *bi, *bj, *bdiag, *bdiag_rev;
3163: PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3164: PetscInt nlnk, *lnk;
3165: PetscBT lnkbt;
3166: PetscBool row_identity, icol_identity;
3167: MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3168: const PetscInt *ics;
3169: PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3170: PetscReal dt = info->dt, shift = info->shiftamount;
3171: PetscInt dtcount = (PetscInt)info->dtcount, nnz_max;
3172: PetscBool missing;
3174: if (dt == PETSC_DEFAULT) dt = 0.005;
3175: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3177: /* ------- symbolic factorization, can be reused ---------*/
3178: MatMissingDiagonal(A, &missing, &i);
3180: adiag = a->diag;
3182: ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
3184: /* bdiag is location of diagonal in factor */
3185: PetscMalloc1(n + 1, &bdiag); /* becomes b->diag */
3186: PetscMalloc1(n + 1, &bdiag_rev); /* temporary */
3188: /* allocate row pointers bi */
3189: PetscMalloc1(2 * n + 2, &bi);
3191: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3192: if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3193: nnz_max = ai[n] + 2 * n * dtcount + 2;
3195: PetscMalloc1(nnz_max + 1, &bj);
3196: PetscMalloc1(nnz_max + 1, &ba);
3198: /* put together the new matrix */
3199: MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL);
3200: b = (Mat_SeqAIJ *)B->data;
3202: b->free_a = PETSC_TRUE;
3203: b->free_ij = PETSC_TRUE;
3204: b->singlemalloc = PETSC_FALSE;
3206: b->a = ba;
3207: b->j = bj;
3208: b->i = bi;
3209: b->diag = bdiag;
3210: b->ilen = NULL;
3211: b->imax = NULL;
3212: b->row = isrow;
3213: b->col = iscol;
3214: PetscObjectReference((PetscObject)isrow);
3215: PetscObjectReference((PetscObject)iscol);
3216: b->icol = isicol;
3218: PetscMalloc1(n + 1, &b->solve_work);
3219: b->maxnz = nnz_max;
3221: B->factortype = MAT_FACTOR_ILUDT;
3222: B->info.factor_mallocs = 0;
3223: B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3224: /* ------- end of symbolic factorization ---------*/
3226: ISGetIndices(isrow, &r);
3227: ISGetIndices(isicol, &ic);
3228: ics = ic;
3230: /* linked list for storing column indices of the active row */
3231: nlnk = n + 1;
3232: PetscLLCreate(n, n, nlnk, lnk, lnkbt);
3234: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3235: PetscMalloc2(n, &im, n, &jtmp);
3236: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3237: PetscMalloc2(n, &rtmp, n, &vtmp);
3238: PetscArrayzero(rtmp, n);
3240: bi[0] = 0;
3241: bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */
3242: bdiag_rev[n] = bdiag[0];
3243: bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3244: for (i = 0; i < n; i++) {
3245: /* copy initial fill into linked list */
3246: nzi = ai[r[i] + 1] - ai[r[i]];
3248: nzi_al = adiag[r[i]] - ai[r[i]];
3249: nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3250: ajtmp = aj + ai[r[i]];
3251: PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt);
3253: /* load in initial (unfactored row) */
3254: aatmp = a->a + ai[r[i]];
3255: for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3257: /* add pivot rows into linked list */
3258: row = lnk[n];
3259: while (row < i) {
3260: nzi_bl = bi[row + 1] - bi[row] + 1;
3261: bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3262: PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im);
3263: nzi += nlnk;
3264: row = lnk[row];
3265: }
3267: /* copy data from lnk into jtmp, then initialize lnk */
3268: PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt);
3270: /* numerical factorization */
3271: bjtmp = jtmp;
3272: row = *bjtmp++; /* 1st pivot row */
3273: while (row < i) {
3274: pc = rtmp + row;
3275: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3276: multiplier = (*pc) * (*pv);
3277: *pc = multiplier;
3278: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3279: pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3280: pv = ba + bdiag[row + 1] + 1;
3281: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3282: for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3283: PetscLogFlops(1 + 2.0 * nz);
3284: }
3285: row = *bjtmp++;
3286: }
3288: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3289: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3290: nzi_bl = 0;
3291: j = 0;
3292: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3293: vtmp[j] = rtmp[jtmp[j]];
3294: rtmp[jtmp[j]] = 0.0;
3295: nzi_bl++;
3296: j++;
3297: }
3298: nzi_bu = nzi - nzi_bl - 1;
3299: while (j < nzi) {
3300: vtmp[j] = rtmp[jtmp[j]];
3301: rtmp[jtmp[j]] = 0.0;
3302: j++;
3303: }
3305: bjtmp = bj + bi[i];
3306: batmp = ba + bi[i];
3307: /* apply level dropping rule to L part */
3308: ncut = nzi_al + dtcount;
3309: if (ncut < nzi_bl) {
3310: PetscSortSplit(ncut, nzi_bl, vtmp, jtmp);
3311: PetscSortIntWithScalarArray(ncut, jtmp, vtmp);
3312: } else {
3313: ncut = nzi_bl;
3314: }
3315: for (j = 0; j < ncut; j++) {
3316: bjtmp[j] = jtmp[j];
3317: batmp[j] = vtmp[j];
3318: }
3319: bi[i + 1] = bi[i] + ncut;
3320: nzi = ncut + 1;
3322: /* apply level dropping rule to U part */
3323: ncut = nzi_au + dtcount;
3324: if (ncut < nzi_bu) {
3325: PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1);
3326: PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1);
3327: } else {
3328: ncut = nzi_bu;
3329: }
3330: nzi += ncut;
3332: /* mark bdiagonal */
3333: bdiag[i + 1] = bdiag[i] - (ncut + 1);
3334: bdiag_rev[n - i - 1] = bdiag[i + 1];
3335: bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1);
3336: bjtmp = bj + bdiag[i];
3337: batmp = ba + bdiag[i];
3338: *bjtmp = i;
3339: *batmp = diag_tmp; /* rtmp[i]; */
3340: if (*batmp == 0.0) *batmp = dt + shift;
3341: *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3343: bjtmp = bj + bdiag[i + 1] + 1;
3344: batmp = ba + bdiag[i + 1] + 1;
3345: for (k = 0; k < ncut; k++) {
3346: bjtmp[k] = jtmp[nzi_bl + 1 + k];
3347: batmp[k] = vtmp[nzi_bl + 1 + k];
3348: }
3350: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3351: } /* for (i=0; i<n; i++) */
3354: ISRestoreIndices(isrow, &r);
3355: ISRestoreIndices(isicol, &ic);
3357: PetscLLDestroy(lnk, lnkbt);
3358: PetscFree2(im, jtmp);
3359: PetscFree2(rtmp, vtmp);
3360: PetscFree(bdiag_rev);
3362: PetscLogFlops(B->cmap->n);
3363: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3365: ISIdentity(isrow, &row_identity);
3366: ISIdentity(isicol, &icol_identity);
3367: if (row_identity && icol_identity) {
3368: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3369: } else {
3370: B->ops->solve = MatSolve_SeqAIJ;
3371: }
3373: B->ops->solveadd = NULL;
3374: B->ops->solvetranspose = NULL;
3375: B->ops->solvetransposeadd = NULL;
3376: B->ops->matsolve = NULL;
3377: B->assembled = PETSC_TRUE;
3378: B->preallocated = PETSC_TRUE;
3379: return 0;
3380: }
3382: /* a wrapper of MatILUDTFactor_SeqAIJ() */
3383: /*
3384: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3385: */
3387: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3388: {
3389: MatILUDTFactor_SeqAIJ(A, row, col, info, &fact);
3390: return 0;
3391: }
3393: /*
3394: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3395: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3396: */
3397: /*
3398: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3399: */
3401: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3402: {
3403: Mat C = fact;
3404: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3405: IS isrow = b->row, isicol = b->icol;
3406: const PetscInt *r, *ic, *ics;
3407: PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3408: PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3409: MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3410: PetscReal dt = info->dt, shift = info->shiftamount;
3411: PetscBool row_identity, col_identity;
3413: ISGetIndices(isrow, &r);
3414: ISGetIndices(isicol, &ic);
3415: PetscMalloc1(n + 1, &rtmp);
3416: ics = ic;
3418: for (i = 0; i < n; i++) {
3419: /* initialize rtmp array */
3420: nzl = bi[i + 1] - bi[i]; /* num of nozeros in L(i,:) */
3421: bjtmp = bj + bi[i];
3422: for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3423: rtmp[i] = 0.0;
3424: nzu = bdiag[i] - bdiag[i + 1]; /* num of nozeros in U(i,:) */
3425: bjtmp = bj + bdiag[i + 1] + 1;
3426: for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3428: /* load in initial unfactored row of A */
3429: nz = ai[r[i] + 1] - ai[r[i]];
3430: ajtmp = aj + ai[r[i]];
3431: v = aa + ai[r[i]];
3432: for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3434: /* numerical factorization */
3435: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3436: nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3437: k = 0;
3438: while (k < nzl) {
3439: row = *bjtmp++;
3440: pc = rtmp + row;
3441: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3442: multiplier = (*pc) * (*pv);
3443: *pc = multiplier;
3444: if (PetscAbsScalar(multiplier) > dt) {
3445: pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3446: pv = b->a + bdiag[row + 1] + 1;
3447: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3448: for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3449: PetscLogFlops(1 + 2.0 * nz);
3450: }
3451: k++;
3452: }
3454: /* finished row so stick it into b->a */
3455: /* L-part */
3456: pv = b->a + bi[i];
3457: pj = bj + bi[i];
3458: nzl = bi[i + 1] - bi[i];
3459: for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3461: /* diagonal: invert diagonal entries for simpler triangular solves */
3462: if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3463: b->a[bdiag[i]] = 1.0 / rtmp[i];
3465: /* U-part */
3466: pv = b->a + bdiag[i + 1] + 1;
3467: pj = bj + bdiag[i + 1] + 1;
3468: nzu = bdiag[i] - bdiag[i + 1] - 1;
3469: for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3470: }
3472: PetscFree(rtmp);
3473: ISRestoreIndices(isicol, &ic);
3474: ISRestoreIndices(isrow, &r);
3476: ISIdentity(isrow, &row_identity);
3477: ISIdentity(isicol, &col_identity);
3478: if (row_identity && col_identity) {
3479: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3480: } else {
3481: C->ops->solve = MatSolve_SeqAIJ;
3482: }
3483: C->ops->solveadd = NULL;
3484: C->ops->solvetranspose = NULL;
3485: C->ops->solvetransposeadd = NULL;
3486: C->ops->matsolve = NULL;
3487: C->assembled = PETSC_TRUE;
3488: C->preallocated = PETSC_TRUE;
3490: PetscLogFlops(C->cmap->n);
3491: return 0;
3492: }