Actual source code: spbas_cholesky.h
2: /*
3: spbas_cholesky_row_alloc:
4: in the data arrays, find a place where another row may be stored.
5: Return PETSC_ERR_MEM if there is insufficient space to store the
6: row, so garbage collection and/or re-allocation may be done.
7: */
8: PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz, PetscInt *n_alloc_used)
9: {
10: retval.icols[k] = &retval.alloc_icol[*n_alloc_used];
11: retval.values[k] = &retval.alloc_val[*n_alloc_used];
12: *n_alloc_used += r_nnz;
13: return (*n_alloc_used > retval.n_alloc_icol) ? PETSC_FALSE : PETSC_TRUE;
14: }
16: /*
17: spbas_cholesky_garbage_collect:
18: move the rows which have been calculated so far, as well as
19: those currently under construction, to the front of the
20: array, while putting them in the proper order.
21: When it seems necessary, enlarge the current arrays.
23: NB: re-allocation is being simulated using
24: PetscMalloc, memcpy, PetscFree, because
25: PetscRealloc does not seem to exist.
27: */
28: PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
29: Only the storage, not the contents of this matrix is changed in this function */
30: PetscInt i_row, /* I : Number of rows for which the final contents are known */
31: PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
32: places in the arrays: they need not be moved any more */
33: PetscInt *n_alloc_used, /* I/O: */
34: PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */
35: {
36: /* PSEUDO-CODE:
37: 1. Choose the appropriate size for the arrays
38: 2. Rescue the arrays which would be lost during garbage collection
39: 3. Reallocate and correct administration
40: 4. Move all arrays so that they are in proper order */
42: PetscInt i, j;
43: PetscInt nrows = result->nrows;
44: PetscInt n_alloc_ok = 0;
45: PetscInt n_alloc_ok_max = 0;
46: PetscInt need_already = 0;
47: PetscInt max_need_extra = 0;
48: PetscInt n_alloc_max, n_alloc_est, n_alloc;
49: PetscInt n_alloc_now = result->n_alloc_icol;
50: PetscInt *alloc_icol_old = result->alloc_icol;
51: PetscScalar *alloc_val_old = result->alloc_val;
52: PetscInt *icol_rescue;
53: PetscScalar *val_rescue;
54: PetscInt n_rescue;
55: PetscInt i_here, i_last, n_copy;
56: const PetscReal xtra_perc = 20;
58: /*********************************************************
59: 1. Choose appropriate array size
60: Count number of rows and memory usage which is already final */
61: for (i = 0; i < i_row; i++) {
62: n_alloc_ok += result->row_nnz[i];
63: n_alloc_ok_max += max_row_nnz[i];
64: }
66: /* Count currently needed memory usage and future memory requirements
67: (max, predicted)*/
68: for (i = i_row; i < nrows; i++) {
69: if (!result->row_nnz[i]) {
70: max_need_extra += max_row_nnz[i];
71: } else {
72: need_already += max_row_nnz[i];
73: }
74: }
76: /* Make maximal and realistic memory requirement estimates */
77: n_alloc_max = n_alloc_ok + need_already + max_need_extra;
78: n_alloc_est = n_alloc_ok + need_already + (int)(((PetscReal)max_need_extra) * ((PetscReal)n_alloc_ok) / ((PetscReal)n_alloc_ok_max));
80: /* Choose array sizes */
81: if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
82: else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
83: else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) n_alloc = n_alloc_max;
84: else n_alloc = (int)(n_alloc_est * (1 + xtra_perc / 100.0));
86: /* If new estimate is less than what we already have,
87: don't reallocate, just garbage-collect */
88: if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) n_alloc = result->n_alloc_icol;
90: /* Motivate dimension choice */
91: PetscInfo(NULL, " Allocating %" PetscInt_FMT " nonzeros: ", n_alloc);
92: if (n_alloc_max == n_alloc_est) {
93: PetscInfo(NULL, "this is the correct size\n");
94: } else if (n_alloc_now >= n_alloc_est) {
95: PetscInfo(NULL, "the current size, which seems enough\n");
96: } else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) {
97: PetscInfo(NULL, "the maximum estimate\n");
98: } else {
99: PetscInfo(NULL, "%6.2f %% more than the estimate\n", (double)xtra_perc);
100: }
102: /**********************************************************
103: 2. Rescue arrays which would be lost
104: Count how many rows and nonzeros will have to be rescued
105: when moving all arrays in place */
106: n_rescue = 0;
107: if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
108: else {
109: i = *n_row_alloc_ok - 1;
111: *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
112: }
114: for (i = *n_row_alloc_ok; i < nrows; i++) {
115: i_here = result->icols[i] - result->alloc_icol;
116: i_last = i_here + result->row_nnz[i];
117: if (result->row_nnz[i] > 0) {
118: if (*n_alloc_used > i_here || i_last > n_alloc) n_rescue += result->row_nnz[i];
120: if (i < i_row) *n_alloc_used += result->row_nnz[i];
121: else *n_alloc_used += max_row_nnz[i];
122: }
123: }
125: /* Allocate rescue arrays */
126: PetscMalloc1(n_rescue, &icol_rescue);
127: PetscMalloc1(n_rescue, &val_rescue);
129: /* Rescue the arrays which need rescuing */
130: n_rescue = 0;
131: if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
132: else {
133: i = *n_row_alloc_ok - 1;
134: *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
135: }
137: for (i = *n_row_alloc_ok; i < nrows; i++) {
138: i_here = result->icols[i] - result->alloc_icol;
139: i_last = i_here + result->row_nnz[i];
140: if (result->row_nnz[i] > 0) {
141: if (*n_alloc_used > i_here || i_last > n_alloc) {
142: PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);
143: PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);
144: n_rescue += result->row_nnz[i];
145: }
147: if (i < i_row) *n_alloc_used += result->row_nnz[i];
148: else *n_alloc_used += max_row_nnz[i];
149: }
150: }
152: /**********************************************************
153: 3. Reallocate and correct administration */
155: if (n_alloc != result->n_alloc_icol) {
156: n_copy = PetscMin(n_alloc, result->n_alloc_icol);
158: /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
160: Allocate new icol-data, copy old contents */
161: PetscMalloc1(n_alloc, &result->alloc_icol);
162: PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);
164: /* Update administration, Reset pointers to new arrays */
165: result->n_alloc_icol = n_alloc;
166: for (i = 0; i < nrows; i++) {
167: result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old);
168: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
169: }
171: /* Delete old array */
172: PetscFree(alloc_icol_old);
174: /* Allocate new value-data, copy old contents */
175: PetscMalloc1(n_alloc, &result->alloc_val);
176: PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);
178: /* Update administration, Reset pointers to new arrays */
179: result->n_alloc_val = n_alloc;
180: for (i = 0; i < nrows; i++) result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
182: /* Delete old array */
183: PetscFree(alloc_val_old);
184: }
186: /*********************************************************
187: 4. Copy all the arrays to their proper places */
188: n_rescue = 0;
189: if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
190: else {
191: i = *n_row_alloc_ok - 1;
193: *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
194: }
196: for (i = *n_row_alloc_ok; i < nrows; i++) {
197: i_here = result->icols[i] - result->alloc_icol;
198: i_last = i_here + result->row_nnz[i];
200: result->icols[i] = result->alloc_icol + *n_alloc_used;
201: result->values[i] = result->alloc_val + *n_alloc_used;
203: if (result->row_nnz[i] > 0) {
204: if (*n_alloc_used > i_here || i_last > n_alloc) {
205: PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);
206: PetscArraycpy(result->values[i], &val_rescue[n_rescue], result->row_nnz[i]);
208: n_rescue += result->row_nnz[i];
209: } else {
210: for (j = 0; j < result->row_nnz[i]; j++) {
211: result->icols[i][j] = result->alloc_icol[i_here + j];
212: result->values[i][j] = result->alloc_val[i_here + j];
213: }
214: }
215: if (i < i_row) *n_alloc_used += result->row_nnz[i];
216: else *n_alloc_used += max_row_nnz[i];
217: }
218: }
220: /* Delete the rescue arrays */
221: PetscFree(icol_rescue);
222: PetscFree(val_rescue);
224: *n_row_alloc_ok = i_row;
225: return 0;
226: }
228: /*
229: spbas_incomplete_cholesky:
230: incomplete Cholesky decomposition of a square, symmetric,
231: positive definite matrix.
233: In case negative diagonals are encountered, function returns
234: NEGATIVE_DIAGONAL. When this happens, call this function again
235: with a larger epsdiag_in, a less sparse pattern, and/or a smaller
236: droptol
237: */
238: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix *matrix_L, PetscBool *success)
239: {
240: PetscInt jL;
241: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
242: PetscInt *ai = a->i, *aj = a->j;
243: MatScalar *aa = a->a;
244: PetscInt nrows, ncols;
245: PetscInt *max_row_nnz;
246: spbas_matrix retval;
247: PetscScalar *diag;
248: PetscScalar *val;
249: PetscScalar *lvec;
250: PetscScalar epsdiag;
251: PetscInt i, j, k;
252: const PetscBool do_values = PETSC_TRUE;
253: PetscInt *r1_icol;
254: PetscScalar *r1_val;
255: PetscInt *r_icol;
256: PetscInt r_nnz;
257: PetscScalar *r_val;
258: PetscInt *A_icol;
259: PetscInt A_nnz;
260: PetscScalar *A_val;
261: PetscInt *p_icol;
262: PetscInt p_nnz;
263: PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */
264: PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */
266: /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */
267: MatGetSize(A, &nrows, &ncols);
268: MatGetTrace(A, &epsdiag);
270: epsdiag *= epsdiag_in / nrows;
272: PetscInfo(NULL, " Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag), (double)droptol);
274: if (droptol < 1e-10) droptol = 1e-10;
276: retval.nrows = nrows;
277: retval.ncols = nrows;
278: retval.nnz = pattern.nnz / 10;
279: retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
280: retval.block_data = PETSC_TRUE;
282: spbas_allocate_pattern(&retval, do_values);
283: PetscArrayzero(retval.row_nnz, nrows);
284: spbas_allocate_data(&retval);
285: retval.nnz = 0;
287: PetscMalloc1(nrows, &diag);
288: PetscCalloc1(nrows, &val);
289: PetscCalloc1(nrows, &lvec);
290: PetscCalloc1(nrows, &max_row_nnz);
292: /* Count the nonzeros on transpose of pattern */
293: for (i = 0; i < nrows; i++) {
294: p_nnz = pattern.row_nnz[i];
295: p_icol = pattern.icols[i];
296: for (j = 0; j < p_nnz; j++) max_row_nnz[i + p_icol[j]]++;
297: }
299: /* Calculate rows of L */
300: for (i = 0; i < nrows; i++) {
301: p_nnz = pattern.row_nnz[i];
302: p_icol = pattern.icols[i];
304: r_nnz = retval.row_nnz[i];
305: r_icol = retval.icols[i];
306: r_val = retval.values[i];
307: A_nnz = ai[rip[i] + 1] - ai[rip[i]];
308: A_icol = &aj[ai[rip[i]]];
309: A_val = &aa[ai[rip[i]]];
311: /* Calculate val += A(i,i:n)'; */
312: for (j = 0; j < A_nnz; j++) {
313: k = riip[A_icol[j]];
314: if (k >= i) val[k] = A_val[j];
315: }
317: /* Add regularization */
318: val[i] += epsdiag;
320: /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i);
321: val(i) = A(i,i) - L(0:i-1,i)' * lvec */
322: for (j = 0; j < r_nnz; j++) {
323: k = r_icol[j];
324: lvec[k] = diag[k] * r_val[j];
325: val[i] -= r_val[j] * lvec[k];
326: }
328: /* Calculate the new diagonal */
329: diag[i] = val[i];
330: if (PetscRealPart(diag[i]) < droptol) {
331: PetscInfo(NULL, "Error in spbas_incomplete_cholesky:\n");
332: PetscInfo(NULL, "Negative diagonal in row %" PetscInt_FMT "\n", i + 1);
334: /* Delete the whole matrix at once. */
335: spbas_delete(retval);
336: *success = PETSC_FALSE;
337: return 0;
338: }
340: /* If necessary, allocate arrays */
341: if (r_nnz == 0) {
342: PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
344: r_icol = retval.icols[i];
345: r_val = retval.values[i];
346: }
348: /* Now, fill in */
349: r_icol[r_nnz] = i;
350: r_val[r_nnz] = 1.0;
351: r_nnz++;
352: retval.row_nnz[i]++;
354: retval.nnz += r_nnz;
356: /* Calculate
357: val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
358: for (j = 1; j < p_nnz; j++) {
359: k = i + p_icol[j];
360: r1_icol = retval.icols[k];
361: r1_val = retval.values[k];
362: for (jL = 0; jL < retval.row_nnz[k]; jL++) val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
363: val[k] /= diag[i];
365: if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k]) < -droptol) {
366: /* If necessary, allocate arrays */
367: if (!retval.row_nnz[k]) {
368: PetscBool flag, success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
369: if (!success) {
370: spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
371: flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
373: r_icol = retval.icols[i];
374: }
375: }
377: retval.icols[k][retval.row_nnz[k]] = i;
378: retval.values[k][retval.row_nnz[k]] = val[k];
379: retval.row_nnz[k]++;
380: }
381: val[k] = 0;
382: }
384: /* Erase the values used in the work arrays */
385: for (j = 0; j < r_nnz; j++) lvec[r_icol[j]] = 0;
386: }
388: PetscFree(lvec);
389: PetscFree(val);
391: spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
392: PetscFree(max_row_nnz);
394: /* Place the inverse of the diagonals in the matrix */
395: for (i = 0; i < nrows; i++) {
396: r_nnz = retval.row_nnz[i];
398: retval.values[i][r_nnz - 1] = 1.0 / diag[i];
399: for (j = 0; j < r_nnz - 1; j++) retval.values[i][j] *= -1;
400: }
401: PetscFree(diag);
402: *matrix_L = retval;
403: *success = PETSC_TRUE;
404: return 0;
405: }