Actual source code: baijfact3.c
2: /*
3: Factorization code for BAIJ format.
4: */
5: #include <../src/mat/impls/baij/seq/baij.h>
6: #include <petsc/private/kernels/blockinvert.h>
8: /*
9: This is used to set the numeric factorization for both LU and ILU symbolic factorization
10: */
11: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural)
12: {
13: if (natural) {
14: switch (fact->rmap->bs) {
15: case 1:
16: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17: break;
18: case 2:
19: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
20: break;
21: case 3:
22: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
23: break;
24: case 4:
25: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
26: break;
27: case 5:
28: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
29: break;
30: case 6:
31: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
32: break;
33: case 7:
34: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
35: break;
36: case 9:
37: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
38: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
39: #else
40: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
41: #endif
42: break;
43: case 15:
44: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
45: break;
46: default:
47: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
48: break;
49: }
50: } else {
51: switch (fact->rmap->bs) {
52: case 1:
53: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
54: break;
55: case 2:
56: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
57: break;
58: case 3:
59: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
60: break;
61: case 4:
62: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
63: break;
64: case 5:
65: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
66: break;
67: case 6:
68: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
69: break;
70: case 7:
71: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
72: break;
73: default:
74: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
75: break;
76: }
77: }
78: return 0;
79: }
81: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural)
82: {
83: if (natural) {
84: switch (inA->rmap->bs) {
85: case 1:
86: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
87: break;
88: case 2:
89: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
90: break;
91: case 3:
92: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
93: break;
94: case 4:
95: #if defined(PETSC_USE_REAL_MAT_SINGLE)
96: {
97: PetscBool sse_enabled_local;
98: PetscSSEIsEnabled(inA->comm, &sse_enabled_local, NULL);
99: if (sse_enabled_local) {
100: #if defined(PETSC_HAVE_SSE)
101: int i, *AJ = a->j, nz = a->nz, n = a->mbs;
102: if (n == (unsigned short)n) {
103: unsigned short *aj = (unsigned short *)AJ;
104: for (i = 0; i < nz; i++) aj[i] = (unsigned short)AJ[i];
106: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
107: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
109: PetscInfo(inA, "Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
110: } else {
111: /* Scale the column indices for easier indexing in MatSolve. */
112: /* for (i=0;i<nz;i++) { */
113: /* AJ[i] = AJ[i]*4; */
114: /* } */
115: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
116: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
118: PetscInfo(inA, "Using special SSE, in-place natural ordering, int j index factor BS=4\n");
119: }
120: #else
121: /* This should never be reached. If so, problem in PetscSSEIsEnabled. */
122: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "SSE Hardware unavailable");
123: #endif
124: } else {
125: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
126: }
127: }
128: #else
129: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
130: #endif
131: break;
132: case 5:
133: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
134: break;
135: case 6:
136: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
137: break;
138: case 7:
139: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
140: break;
141: default:
142: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
143: break;
144: }
145: } else {
146: switch (inA->rmap->bs) {
147: case 1:
148: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
149: break;
150: case 2:
151: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
152: break;
153: case 3:
154: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
155: break;
156: case 4:
157: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
158: break;
159: case 5:
160: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
161: break;
162: case 6:
163: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
164: break;
165: case 7:
166: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
167: break;
168: default:
169: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
170: break;
171: }
172: }
173: return 0;
174: }
176: /*
177: The symbolic factorization code is identical to that for AIJ format,
178: except for very small changes since this is now a SeqBAIJ datastructure.
179: NOT good code reuse.
180: */
181: #include <petscbt.h>
182: #include <../src/mat/utils/freespace.h>
184: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
185: {
186: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b;
187: PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
188: PetscBool row_identity, col_identity, both_identity;
189: IS isicol;
190: const PetscInt *r, *ic;
191: PetscInt i, *ai = a->i, *aj = a->j;
192: PetscInt *bi, *bj, *ajtmp;
193: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
194: PetscReal f;
195: PetscInt nlnk, *lnk, k, **bi_ptr;
196: PetscFreeSpaceList free_space = NULL, current_space = NULL;
197: PetscBT lnkbt;
198: PetscBool missing;
201: MatMissingDiagonal(A, &missing, &i);
204: if (bs > 1) { /* check shifttype */
206: }
208: ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
209: ISGetIndices(isrow, &r);
210: ISGetIndices(isicol, &ic);
212: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
213: PetscMalloc1(n + 1, &bi);
214: PetscMalloc1(n + 1, &bdiag);
215: bi[0] = bdiag[0] = 0;
217: /* linked list for storing column indices of the active row */
218: nlnk = n + 1;
219: PetscLLCreate(n, n, nlnk, lnk, lnkbt);
221: PetscMalloc2(n + 1, &bi_ptr, n + 1, &im);
223: /* initial FreeSpace size is f*(ai[n]+1) */
224: f = info->fill;
225: PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space);
227: current_space = free_space;
229: for (i = 0; i < n; i++) {
230: /* copy previous fill into linked list */
231: nzi = 0;
232: nnz = ai[r[i] + 1] - ai[r[i]];
233: ajtmp = aj + ai[r[i]];
234: PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt);
235: nzi += nlnk;
237: /* add pivot rows into linked list */
238: row = lnk[n];
239: while (row < i) {
240: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
241: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
242: PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im);
243: nzi += nlnk;
244: row = lnk[row];
245: }
246: bi[i + 1] = bi[i] + nzi;
247: im[i] = nzi;
249: /* mark bdiag */
250: nzbd = 0;
251: nnz = nzi;
252: k = lnk[n];
253: while (nnz-- && k < i) {
254: nzbd++;
255: k = lnk[k];
256: }
257: bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
259: /* if free space is not available, make more free space */
260: if (current_space->local_remaining < nzi) {
261: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */
262: PetscFreeSpaceGet(nnz, ¤t_space);
263: reallocs++;
264: }
266: /* copy data into free space, then initialize lnk */
267: PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt);
269: bi_ptr[i] = current_space->array;
270: current_space->array += nzi;
271: current_space->local_used += nzi;
272: current_space->local_remaining -= nzi;
273: }
275: ISRestoreIndices(isrow, &r);
276: ISRestoreIndices(isicol, &ic);
278: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
279: PetscMalloc1(bi[n] + 1, &bj);
280: PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag);
281: PetscLLDestroy(lnk, lnkbt);
282: PetscFree2(bi_ptr, im);
284: /* put together the new matrix */
285: MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL);
286: b = (Mat_SeqBAIJ *)(B)->data;
288: b->free_a = PETSC_TRUE;
289: b->free_ij = PETSC_TRUE;
290: b->singlemalloc = PETSC_FALSE;
292: PetscMalloc1((bdiag[0] + 1) * bs2, &b->a);
293: b->j = bj;
294: b->i = bi;
295: b->diag = bdiag;
296: b->free_diag = PETSC_TRUE;
297: b->ilen = NULL;
298: b->imax = NULL;
299: b->row = isrow;
300: b->col = iscol;
301: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
303: PetscObjectReference((PetscObject)isrow);
304: PetscObjectReference((PetscObject)iscol);
305: b->icol = isicol;
306: PetscMalloc1(bs * n + bs, &b->solve_work);
308: b->maxnz = b->nz = bdiag[0] + 1;
310: B->factortype = MAT_FACTOR_LU;
311: B->info.factor_mallocs = reallocs;
312: B->info.fill_ratio_given = f;
314: if (ai[n] != 0) {
315: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
316: } else {
317: B->info.fill_ratio_needed = 0.0;
318: }
319: #if defined(PETSC_USE_INFO)
320: if (ai[n] != 0) {
321: PetscReal af = B->info.fill_ratio_needed;
322: PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af);
323: PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
324: PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af);
325: PetscInfo(A, "for best performance.\n");
326: } else {
327: PetscInfo(A, "Empty matrix\n");
328: }
329: #endif
331: ISIdentity(isrow, &row_identity);
332: ISIdentity(iscol, &col_identity);
334: both_identity = (PetscBool)(row_identity && col_identity);
336: MatSeqBAIJSetNumericFactorization(B, both_identity);
337: return 0;
338: }
340: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
341: {
342: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b;
343: PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
344: PetscBool row_identity, col_identity, both_identity;
345: IS isicol;
346: const PetscInt *r, *ic;
347: PetscInt i, *ai = a->i, *aj = a->j;
348: PetscInt *bi, *bj, *ajtmp;
349: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
350: PetscReal f;
351: PetscInt nlnk, *lnk, k, **bi_ptr;
352: PetscFreeSpaceList free_space = NULL, current_space = NULL;
353: PetscBT lnkbt;
354: PetscBool missing;
357: MatMissingDiagonal(A, &missing, &i);
360: ISInvertPermutation(iscol, PETSC_DECIDE, &isicol);
361: ISGetIndices(isrow, &r);
362: ISGetIndices(isicol, &ic);
364: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
365: PetscMalloc1(n + 1, &bi);
366: PetscMalloc1(n + 1, &bdiag);
368: bi[0] = bdiag[0] = 0;
370: /* linked list for storing column indices of the active row */
371: nlnk = n + 1;
372: PetscLLCreate(n, n, nlnk, lnk, lnkbt);
374: PetscMalloc2(n + 1, &bi_ptr, n + 1, &im);
376: /* initial FreeSpace size is f*(ai[n]+1) */
377: f = info->fill;
378: PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space);
379: current_space = free_space;
381: for (i = 0; i < n; i++) {
382: /* copy previous fill into linked list */
383: nzi = 0;
384: nnz = ai[r[i] + 1] - ai[r[i]];
385: ajtmp = aj + ai[r[i]];
386: PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt);
387: nzi += nlnk;
389: /* add pivot rows into linked list */
390: row = lnk[n];
391: while (row < i) {
392: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
393: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
394: PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im);
395: nzi += nlnk;
396: row = lnk[row];
397: }
398: bi[i + 1] = bi[i] + nzi;
399: im[i] = nzi;
401: /* mark bdiag */
402: nzbd = 0;
403: nnz = nzi;
404: k = lnk[n];
405: while (nnz-- && k < i) {
406: nzbd++;
407: k = lnk[k];
408: }
409: bdiag[i] = bi[i] + nzbd;
411: /* if free space is not available, make more free space */
412: if (current_space->local_remaining < nzi) {
413: nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
414: PetscFreeSpaceGet(nnz, ¤t_space);
415: reallocs++;
416: }
418: /* copy data into free space, then initialize lnk */
419: PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt);
421: bi_ptr[i] = current_space->array;
422: current_space->array += nzi;
423: current_space->local_used += nzi;
424: current_space->local_remaining -= nzi;
425: }
426: #if defined(PETSC_USE_INFO)
427: if (ai[n] != 0) {
428: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
429: PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af);
430: PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af);
431: PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af);
432: PetscInfo(A, "for best performance.\n");
433: } else {
434: PetscInfo(A, "Empty matrix\n");
435: }
436: #endif
438: ISRestoreIndices(isrow, &r);
439: ISRestoreIndices(isicol, &ic);
441: /* destroy list of free space and other temporary array(s) */
442: PetscMalloc1(bi[n] + 1, &bj);
443: PetscFreeSpaceContiguous(&free_space, bj);
444: PetscLLDestroy(lnk, lnkbt);
445: PetscFree2(bi_ptr, im);
447: /* put together the new matrix */
448: MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL);
449: b = (Mat_SeqBAIJ *)(B)->data;
451: b->free_a = PETSC_TRUE;
452: b->free_ij = PETSC_TRUE;
453: b->singlemalloc = PETSC_FALSE;
455: PetscMalloc1((bi[n] + 1) * bs2, &b->a);
456: b->j = bj;
457: b->i = bi;
458: b->diag = bdiag;
459: b->free_diag = PETSC_TRUE;
460: b->ilen = NULL;
461: b->imax = NULL;
462: b->row = isrow;
463: b->col = iscol;
464: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
466: PetscObjectReference((PetscObject)isrow);
467: PetscObjectReference((PetscObject)iscol);
468: b->icol = isicol;
470: PetscMalloc1(bs * n + bs, &b->solve_work);
472: b->maxnz = b->nz = bi[n];
474: (B)->factortype = MAT_FACTOR_LU;
475: (B)->info.factor_mallocs = reallocs;
476: (B)->info.fill_ratio_given = f;
478: if (ai[n] != 0) {
479: (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
480: } else {
481: (B)->info.fill_ratio_needed = 0.0;
482: }
484: ISIdentity(isrow, &row_identity);
485: ISIdentity(iscol, &col_identity);
487: both_identity = (PetscBool)(row_identity && col_identity);
489: MatSeqBAIJSetNumericFactorization_inplace(B, both_identity);
490: return 0;
491: }