Actual source code: dgefa2.c
2: /*
3: Inverts 2 by 2 matrix using gaussian elimination with partial pivoting.
5: Used by the sparse factorization routines in
6: src/mat/impls/baij/seq
8: This is a combination of the Linpack routines
9: dgefa() and dgedi() specialized for a size of 2.
11: */
12: #include <petscsys.h>
14: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected)
15: {
16: PetscInt i__2, i__3, kp1, j, k, l, ll, i, ipvt[2], k3;
17: PetscInt k4, j3;
18: MatScalar *aa, *ax, *ay, work[4], stmp;
19: MatReal tmp, max;
21: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
22: shift = .25 * shift * (1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
24: /* Parameter adjustments */
25: a -= 3;
27: k = 1;
28: kp1 = k + 1;
29: k3 = 2 * k;
30: k4 = k3 + k;
32: /* find l = pivot index */
33: i__2 = 3 - k;
34: aa = &a[k4];
35: max = PetscAbsScalar(aa[0]);
36: l = 1;
37: for (ll = 1; ll < i__2; ll++) {
38: tmp = PetscAbsScalar(aa[ll]);
39: if (tmp > max) {
40: max = tmp;
41: l = ll + 1;
42: }
43: }
44: l += k - 1;
45: ipvt[k - 1] = l;
47: if (a[l + k3] == 0.0) {
48: if (shift == 0.0) {
49: if (allowzeropivot) {
50: PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1);
51: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
52: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1);
53: } else {
54: a[l + k3] = shift;
55: }
56: }
58: /* interchange if necessary */
59: if (l != k) {
60: stmp = a[l + k3];
61: a[l + k3] = a[k4];
62: a[k4] = stmp;
63: }
65: /* compute multipliers */
66: stmp = -1. / a[k4];
67: i__2 = 2 - k;
68: aa = &a[1 + k4];
69: for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
71: /* row elimination with column indexing */
72: ax = &a[k4 + 1];
73: for (j = kp1; j <= 2; ++j) {
74: j3 = 2 * j;
75: stmp = a[l + j3];
76: if (l != k) {
77: a[l + j3] = a[k + j3];
78: a[k + j3] = stmp;
79: }
81: i__3 = 2 - k;
82: ay = &a[1 + k + j3];
83: for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll];
84: }
86: ipvt[1] = 2;
87: if (a[6] == 0.0) {
88: if (PetscLikely(allowzeropivot)) {
89: PetscInfo(NULL, "Zero pivot, row 1\n");
90: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
91: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 1");
92: }
94: /* Now form the inverse */
95: /* compute inverse(u) */
96: for (k = 1; k <= 2; ++k) {
97: k3 = 2 * k;
98: k4 = k3 + k;
99: a[k4] = 1.0 / a[k4];
100: stmp = -a[k4];
101: i__2 = k - 1;
102: aa = &a[k3 + 1];
103: for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
104: kp1 = k + 1;
105: if (2 < kp1) continue;
106: ax = aa;
107: for (j = kp1; j <= 2; ++j) {
108: j3 = 2 * j;
109: stmp = a[k + j3];
110: a[k + j3] = 0.0;
111: ay = &a[j3 + 1];
112: for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll];
113: }
114: }
116: /* form inverse(u)*inverse(l) */
117: k = 1;
118: k3 = 2 * k;
119: kp1 = k + 1;
120: aa = a + k3;
121: for (i = kp1; i <= 2; ++i) {
122: work[i - 1] = aa[i];
123: aa[i] = 0.0;
124: }
125: for (j = kp1; j <= 2; ++j) {
126: stmp = work[j - 1];
127: ax = &a[2 * j + 1];
128: ay = &a[k3 + 1];
129: ay[0] += stmp * ax[0];
130: ay[1] += stmp * ax[1];
131: }
132: l = ipvt[k - 1];
133: if (l != k) {
134: ax = &a[k3 + 1];
135: ay = &a[2 * l + 1];
136: stmp = ax[0];
137: ax[0] = ay[0];
138: ay[0] = stmp;
139: stmp = ax[1];
140: ax[1] = ay[1];
141: ay[1] = stmp;
142: }
143: return 0;
144: }
146: /* gaussian elimination with partial pivoting */
147: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected)
148: {
149: PetscInt i__2, i__3, kp1, j, k, l, ll, i, ipvt[9], kb, k3;
150: PetscInt k4, j3;
151: MatScalar *aa, *ax, *ay, work[81], stmp;
152: MatReal tmp, max;
154: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
156: /* Parameter adjustments */
157: a -= 10;
159: for (k = 1; k <= 8; ++k) {
160: kp1 = k + 1;
161: k3 = 9 * k;
162: k4 = k3 + k;
164: /* find l = pivot index */
165: i__2 = 10 - k;
166: aa = &a[k4];
167: max = PetscAbsScalar(aa[0]);
168: l = 1;
169: for (ll = 1; ll < i__2; ll++) {
170: tmp = PetscAbsScalar(aa[ll]);
171: if (tmp > max) {
172: max = tmp;
173: l = ll + 1;
174: }
175: }
176: l += k - 1;
177: ipvt[k - 1] = l;
179: if (a[l + k3] == 0.0) {
180: if (shift == 0.0) {
181: if (allowzeropivot) {
182: PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1);
183: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
184: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1);
185: } else {
186: a[l + k3] = shift;
187: }
188: }
190: /* interchange if necessary */
191: if (l != k) {
192: stmp = a[l + k3];
193: a[l + k3] = a[k4];
194: a[k4] = stmp;
195: }
197: /* compute multipliers */
198: stmp = -1. / a[k4];
199: i__2 = 9 - k;
200: aa = &a[1 + k4];
201: for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
203: /* row elimination with column indexing */
204: ax = &a[k4 + 1];
205: for (j = kp1; j <= 9; ++j) {
206: j3 = 9 * j;
207: stmp = a[l + j3];
208: if (l != k) {
209: a[l + j3] = a[k + j3];
210: a[k + j3] = stmp;
211: }
213: i__3 = 9 - k;
214: ay = &a[1 + k + j3];
215: for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll];
216: }
217: }
218: ipvt[8] = 9;
219: if (a[90] == 0.0) {
220: if (PetscLikely(allowzeropivot)) {
221: PetscInfo(NULL, "Zero pivot, row 8\n");
222: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
223: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 8");
224: }
226: /* Now form the inverse */
227: /* compute inverse(u) */
228: for (k = 1; k <= 9; ++k) {
229: k3 = 9 * k;
230: k4 = k3 + k;
231: a[k4] = 1.0 / a[k4];
232: stmp = -a[k4];
233: i__2 = k - 1;
234: aa = &a[k3 + 1];
235: for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
236: kp1 = k + 1;
237: if (9 < kp1) continue;
238: ax = aa;
239: for (j = kp1; j <= 9; ++j) {
240: j3 = 9 * j;
241: stmp = a[k + j3];
242: a[k + j3] = 0.0;
243: ay = &a[j3 + 1];
244: for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll];
245: }
246: }
248: /* form inverse(u)*inverse(l) */
249: for (kb = 1; kb <= 8; ++kb) {
250: k = 9 - kb;
251: k3 = 9 * k;
252: kp1 = k + 1;
253: aa = a + k3;
254: for (i = kp1; i <= 9; ++i) {
255: work[i - 1] = aa[i];
256: aa[i] = 0.0;
257: }
258: for (j = kp1; j <= 9; ++j) {
259: stmp = work[j - 1];
260: ax = &a[9 * j + 1];
261: ay = &a[k3 + 1];
262: ay[0] += stmp * ax[0];
263: ay[1] += stmp * ax[1];
264: ay[2] += stmp * ax[2];
265: ay[3] += stmp * ax[3];
266: ay[4] += stmp * ax[4];
267: ay[5] += stmp * ax[5];
268: ay[6] += stmp * ax[6];
269: ay[7] += stmp * ax[7];
270: ay[8] += stmp * ax[8];
271: }
272: l = ipvt[k - 1];
273: if (l != k) {
274: ax = &a[k3 + 1];
275: ay = &a[9 * l + 1];
276: stmp = ax[0];
277: ax[0] = ay[0];
278: ay[0] = stmp;
279: stmp = ax[1];
280: ax[1] = ay[1];
281: ay[1] = stmp;
282: stmp = ax[2];
283: ax[2] = ay[2];
284: ay[2] = stmp;
285: stmp = ax[3];
286: ax[3] = ay[3];
287: ay[3] = stmp;
288: stmp = ax[4];
289: ax[4] = ay[4];
290: ay[4] = stmp;
291: stmp = ax[5];
292: ax[5] = ay[5];
293: ay[5] = stmp;
294: stmp = ax[6];
295: ax[6] = ay[6];
296: ay[6] = stmp;
297: stmp = ax[7];
298: ax[7] = ay[7];
299: ay[7] = stmp;
300: stmp = ax[8];
301: ax[8] = ay[8];
302: ay[8] = stmp;
303: }
304: }
305: return 0;
306: }
308: /*
309: Inverts 15 by 15 matrix using gaussian elimination with partial pivoting.
311: Used by the sparse factorization routines in
312: src/mat/impls/baij/seq
314: This is a combination of the Linpack routines
315: dgefa() and dgedi() specialized for a size of 15.
317: */
319: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a, PetscInt *ipvt, MatScalar *work, PetscReal shift, PetscBool allowzeropivot, PetscBool *zeropivotdetected)
320: {
321: PetscInt i__2, i__3, kp1, j, k, l, ll, i, kb, k3;
322: PetscInt k4, j3;
323: MatScalar *aa, *ax, *ay, stmp;
324: MatReal tmp, max;
326: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
328: /* Parameter adjustments */
329: a -= 16;
331: for (k = 1; k <= 14; ++k) {
332: kp1 = k + 1;
333: k3 = 15 * k;
334: k4 = k3 + k;
336: /* find l = pivot index */
337: i__2 = 16 - k;
338: aa = &a[k4];
339: max = PetscAbsScalar(aa[0]);
340: l = 1;
341: for (ll = 1; ll < i__2; ll++) {
342: tmp = PetscAbsScalar(aa[ll]);
343: if (tmp > max) {
344: max = tmp;
345: l = ll + 1;
346: }
347: }
348: l += k - 1;
349: ipvt[k - 1] = l;
351: if (a[l + k3] == 0.0) {
352: if (shift == 0.0) {
353: if (allowzeropivot) {
354: PetscInfo(NULL, "Zero pivot, row %" PetscInt_FMT "\n", k - 1);
355: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
356: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT, k - 1);
357: } else {
358: a[l + k3] = shift;
359: }
360: }
362: /* interchange if necessary */
363: if (l != k) {
364: stmp = a[l + k3];
365: a[l + k3] = a[k4];
366: a[k4] = stmp;
367: }
369: /* compute multipliers */
370: stmp = -1. / a[k4];
371: i__2 = 15 - k;
372: aa = &a[1 + k4];
373: for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
375: /* row elimination with column indexing */
376: ax = &a[k4 + 1];
377: for (j = kp1; j <= 15; ++j) {
378: j3 = 15 * j;
379: stmp = a[l + j3];
380: if (l != k) {
381: a[l + j3] = a[k + j3];
382: a[k + j3] = stmp;
383: }
385: i__3 = 15 - k;
386: ay = &a[1 + k + j3];
387: for (ll = 0; ll < i__3; ll++) ay[ll] += stmp * ax[ll];
388: }
389: }
390: ipvt[14] = 15;
391: if (a[240] == 0.0) {
392: if (PetscLikely(allowzeropivot)) {
393: PetscInfo(NULL, "Zero pivot, row 14\n");
394: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
395: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row 14");
396: }
398: /* Now form the inverse */
399: /* compute inverse(u) */
400: for (k = 1; k <= 15; ++k) {
401: k3 = 15 * k;
402: k4 = k3 + k;
403: a[k4] = 1.0 / a[k4];
404: stmp = -a[k4];
405: i__2 = k - 1;
406: aa = &a[k3 + 1];
407: for (ll = 0; ll < i__2; ll++) aa[ll] *= stmp;
408: kp1 = k + 1;
409: if (15 < kp1) continue;
410: ax = aa;
411: for (j = kp1; j <= 15; ++j) {
412: j3 = 15 * j;
413: stmp = a[k + j3];
414: a[k + j3] = 0.0;
415: ay = &a[j3 + 1];
416: for (ll = 0; ll < k; ll++) ay[ll] += stmp * ax[ll];
417: }
418: }
420: /* form inverse(u)*inverse(l) */
421: for (kb = 1; kb <= 14; ++kb) {
422: k = 15 - kb;
423: k3 = 15 * k;
424: kp1 = k + 1;
425: aa = a + k3;
426: for (i = kp1; i <= 15; ++i) {
427: work[i - 1] = aa[i];
428: aa[i] = 0.0;
429: }
430: for (j = kp1; j <= 15; ++j) {
431: stmp = work[j - 1];
432: ax = &a[15 * j + 1];
433: ay = &a[k3 + 1];
434: ay[0] += stmp * ax[0];
435: ay[1] += stmp * ax[1];
436: ay[2] += stmp * ax[2];
437: ay[3] += stmp * ax[3];
438: ay[4] += stmp * ax[4];
439: ay[5] += stmp * ax[5];
440: ay[6] += stmp * ax[6];
441: ay[7] += stmp * ax[7];
442: ay[8] += stmp * ax[8];
443: ay[9] += stmp * ax[9];
444: ay[10] += stmp * ax[10];
445: ay[11] += stmp * ax[11];
446: ay[12] += stmp * ax[12];
447: ay[13] += stmp * ax[13];
448: ay[14] += stmp * ax[14];
449: }
450: l = ipvt[k - 1];
451: if (l != k) {
452: ax = &a[k3 + 1];
453: ay = &a[15 * l + 1];
454: stmp = ax[0];
455: ax[0] = ay[0];
456: ay[0] = stmp;
457: stmp = ax[1];
458: ax[1] = ay[1];
459: ay[1] = stmp;
460: stmp = ax[2];
461: ax[2] = ay[2];
462: ay[2] = stmp;
463: stmp = ax[3];
464: ax[3] = ay[3];
465: ay[3] = stmp;
466: stmp = ax[4];
467: ax[4] = ay[4];
468: ay[4] = stmp;
469: stmp = ax[5];
470: ax[5] = ay[5];
471: ay[5] = stmp;
472: stmp = ax[6];
473: ax[6] = ay[6];
474: ay[6] = stmp;
475: stmp = ax[7];
476: ax[7] = ay[7];
477: ay[7] = stmp;
478: stmp = ax[8];
479: ax[8] = ay[8];
480: ay[8] = stmp;
481: stmp = ax[9];
482: ax[9] = ay[9];
483: ay[9] = stmp;
484: stmp = ax[10];
485: ax[10] = ay[10];
486: ay[10] = stmp;
487: stmp = ax[11];
488: ax[11] = ay[11];
489: ay[11] = stmp;
490: stmp = ax[12];
491: ax[12] = ay[12];
492: ay[12] = stmp;
493: stmp = ax[13];
494: ax[13] = ay[13];
495: ay[13] = stmp;
496: stmp = ax[14];
497: ax[14] = ay[14];
498: ay[14] = stmp;
499: }
500: }
501: return 0;
502: }