Actual source code: spbas_cholesky.h


  2: /*
  3:    spbas_cholesky_row_alloc:
  4:       in the data arrays, find a place where another row may be stored.
  5:       Return PETSC_ERR_MEM if there is insufficient space to store the
  6:       row, so garbage collection and/or re-allocation may be done.
  7: */
  8: PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz, PetscInt *n_alloc_used)
  9: {
 10:   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
 11:   retval.values[k] = &retval.alloc_val[*n_alloc_used];
 12:   *n_alloc_used += r_nnz;
 13:   return (*n_alloc_used > retval.n_alloc_icol) ? PETSC_FALSE : PETSC_TRUE;
 14: }

 16: /*
 17:   spbas_cholesky_garbage_collect:
 18:      move the rows which have been calculated so far, as well as
 19:      those currently under construction, to the front of the
 20:      array, while putting them in the proper order.
 21:      When it seems necessary, enlarge the current arrays.

 23:      NB: re-allocation is being simulated using
 24:          PetscMalloc, memcpy, PetscFree, because
 25:          PetscRealloc does not seem to exist.

 27: */
 28: PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
 29:                                                                                     Only the storage, not the contents of this matrix is changed in this function */
 30:                                               PetscInt      i_row,          /* I  : Number of rows for which the final contents are known */
 31:                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
 32:                                                                                     places in the arrays: they need not be moved any more */
 33:                                               PetscInt     *n_alloc_used,   /* I/O:  */
 34:                                               PetscInt     *max_row_nnz)        /* I  : Over-estimate of the number of nonzeros needed to store each row */
 35: {
 36:   /* PSEUDO-CODE:
 37:   1. Choose the appropriate size for the arrays
 38:   2. Rescue the arrays which would be lost during garbage collection
 39:   3. Reallocate and correct administration
 40:   4. Move all arrays so that they are in proper order */

 42:   PetscInt        i, j;
 43:   PetscInt        nrows          = result->nrows;
 44:   PetscInt        n_alloc_ok     = 0;
 45:   PetscInt        n_alloc_ok_max = 0;
 46:   PetscInt        need_already   = 0;
 47:   PetscInt        max_need_extra = 0;
 48:   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
 49:   PetscInt        n_alloc_now    = result->n_alloc_icol;
 50:   PetscInt       *alloc_icol_old = result->alloc_icol;
 51:   PetscScalar    *alloc_val_old  = result->alloc_val;
 52:   PetscInt       *icol_rescue;
 53:   PetscScalar    *val_rescue;
 54:   PetscInt        n_rescue;
 55:   PetscInt        i_here, i_last, n_copy;
 56:   const PetscReal xtra_perc = 20;

 58:   /*********************************************************
 59:   1. Choose appropriate array size
 60:   Count number of rows and memory usage which is already final */
 61:   for (i = 0; i < i_row; i++) {
 62:     n_alloc_ok += result->row_nnz[i];
 63:     n_alloc_ok_max += max_row_nnz[i];
 64:   }

 66:   /* Count currently needed memory usage and future memory requirements
 67:     (max, predicted)*/
 68:   for (i = i_row; i < nrows; i++) {
 69:     if (!result->row_nnz[i]) {
 70:       max_need_extra += max_row_nnz[i];
 71:     } else {
 72:       need_already += max_row_nnz[i];
 73:     }
 74:   }

 76:   /* Make maximal and realistic memory requirement estimates */
 77:   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
 78:   n_alloc_est = n_alloc_ok + need_already + (int)(((PetscReal)max_need_extra) * ((PetscReal)n_alloc_ok) / ((PetscReal)n_alloc_ok_max));

 80:   /* Choose array sizes */
 81:   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
 82:   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
 83:   else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) n_alloc = n_alloc_max;
 84:   else n_alloc = (int)(n_alloc_est * (1 + xtra_perc / 100.0));

 86:   /* If new estimate is less than what we already have,
 87:     don't reallocate, just garbage-collect */
 88:   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) n_alloc = result->n_alloc_icol;

 90:   /* Motivate dimension choice */
 91:   PetscInfo(NULL, "   Allocating %" PetscInt_FMT " nonzeros: ", n_alloc);
 92:   if (n_alloc_max == n_alloc_est) {
 93:     PetscInfo(NULL, "this is the correct size\n");
 94:   } else if (n_alloc_now >= n_alloc_est) {
 95:     PetscInfo(NULL, "the current size, which seems enough\n");
 96:   } else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) {
 97:     PetscInfo(NULL, "the maximum estimate\n");
 98:   } else {
 99:     PetscInfo(NULL, "%6.2f %% more than the estimate\n", (double)xtra_perc);
100:   }

102:   /**********************************************************
103:   2. Rescue arrays which would be lost
104:   Count how many rows and nonzeros will have to be rescued
105:   when moving all arrays in place */
106:   n_rescue = 0;
107:   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
108:   else {
109:     i = *n_row_alloc_ok - 1;

111:     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
112:   }

114:   for (i = *n_row_alloc_ok; i < nrows; i++) {
115:     i_here = result->icols[i] - result->alloc_icol;
116:     i_last = i_here + result->row_nnz[i];
117:     if (result->row_nnz[i] > 0) {
118:       if (*n_alloc_used > i_here || i_last > n_alloc) n_rescue += result->row_nnz[i];

120:       if (i < i_row) *n_alloc_used += result->row_nnz[i];
121:       else *n_alloc_used += max_row_nnz[i];
122:     }
123:   }

125:   /* Allocate rescue arrays */
126:   PetscMalloc1(n_rescue, &icol_rescue);
127:   PetscMalloc1(n_rescue, &val_rescue);

129:   /* Rescue the arrays which need rescuing */
130:   n_rescue = 0;
131:   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
132:   else {
133:     i             = *n_row_alloc_ok - 1;
134:     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
135:   }

137:   for (i = *n_row_alloc_ok; i < nrows; i++) {
138:     i_here = result->icols[i] - result->alloc_icol;
139:     i_last = i_here + result->row_nnz[i];
140:     if (result->row_nnz[i] > 0) {
141:       if (*n_alloc_used > i_here || i_last > n_alloc) {
142:         PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);
143:         PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);
144:         n_rescue += result->row_nnz[i];
145:       }

147:       if (i < i_row) *n_alloc_used += result->row_nnz[i];
148:       else *n_alloc_used += max_row_nnz[i];
149:     }
150:   }

152:   /**********************************************************
153:   3. Reallocate and correct administration */

155:   if (n_alloc != result->n_alloc_icol) {
156:     n_copy = PetscMin(n_alloc, result->n_alloc_icol);

158:     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.

160:         Allocate new icol-data, copy old contents */
161:     PetscMalloc1(n_alloc, &result->alloc_icol);
162:     PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);

164:     /* Update administration, Reset pointers to new arrays  */
165:     result->n_alloc_icol = n_alloc;
166:     for (i = 0; i < nrows; i++) {
167:       result->icols[i]  = result->alloc_icol + (result->icols[i] - alloc_icol_old);
168:       result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
169:     }

171:     /* Delete old array */
172:     PetscFree(alloc_icol_old);

174:     /* Allocate new value-data, copy old contents */
175:     PetscMalloc1(n_alloc, &result->alloc_val);
176:     PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);

178:     /* Update administration, Reset pointers to new arrays  */
179:     result->n_alloc_val = n_alloc;
180:     for (i = 0; i < nrows; i++) result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);

182:     /* Delete old array */
183:     PetscFree(alloc_val_old);
184:   }

186:   /*********************************************************
187:   4. Copy all the arrays to their proper places */
188:   n_rescue = 0;
189:   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
190:   else {
191:     i = *n_row_alloc_ok - 1;

193:     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
194:   }

196:   for (i = *n_row_alloc_ok; i < nrows; i++) {
197:     i_here = result->icols[i] - result->alloc_icol;
198:     i_last = i_here + result->row_nnz[i];

200:     result->icols[i]  = result->alloc_icol + *n_alloc_used;
201:     result->values[i] = result->alloc_val + *n_alloc_used;

203:     if (result->row_nnz[i] > 0) {
204:       if (*n_alloc_used > i_here || i_last > n_alloc) {
205:         PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);
206:         PetscArraycpy(result->values[i], &val_rescue[n_rescue], result->row_nnz[i]);

208:         n_rescue += result->row_nnz[i];
209:       } else {
210:         for (j = 0; j < result->row_nnz[i]; j++) {
211:           result->icols[i][j]  = result->alloc_icol[i_here + j];
212:           result->values[i][j] = result->alloc_val[i_here + j];
213:         }
214:       }
215:       if (i < i_row) *n_alloc_used += result->row_nnz[i];
216:       else *n_alloc_used += max_row_nnz[i];
217:     }
218:   }

220:   /* Delete the rescue arrays */
221:   PetscFree(icol_rescue);
222:   PetscFree(val_rescue);

224:   *n_row_alloc_ok = i_row;
225:   return 0;
226: }

228: /*
229:   spbas_incomplete_cholesky:
230:      incomplete Cholesky decomposition of a square, symmetric,
231:      positive definite matrix.

233:      In case negative diagonals are encountered, function returns
234:      NEGATIVE_DIAGONAL. When this happens, call this function again
235:      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
236:      droptol
237: */
238: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix *matrix_L, PetscBool *success)
239: {
240:   PetscInt        jL;
241:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
242:   PetscInt       *ai = a->i, *aj = a->j;
243:   MatScalar      *aa = a->a;
244:   PetscInt        nrows, ncols;
245:   PetscInt       *max_row_nnz;
246:   spbas_matrix    retval;
247:   PetscScalar    *diag;
248:   PetscScalar    *val;
249:   PetscScalar    *lvec;
250:   PetscScalar     epsdiag;
251:   PetscInt        i, j, k;
252:   const PetscBool do_values = PETSC_TRUE;
253:   PetscInt       *r1_icol;
254:   PetscScalar    *r1_val;
255:   PetscInt       *r_icol;
256:   PetscInt        r_nnz;
257:   PetscScalar    *r_val;
258:   PetscInt       *A_icol;
259:   PetscInt        A_nnz;
260:   PetscScalar    *A_val;
261:   PetscInt       *p_icol;
262:   PetscInt        p_nnz;
263:   PetscInt        n_row_alloc_ok = 0; /* number of rows which have been stored   correctly in the matrix */
264:   PetscInt        n_alloc_used   = 0; /* part of result->icols and result->values   which is currently being used */

266:   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
267:   MatGetSize(A, &nrows, &ncols);
268:   MatGetTrace(A, &epsdiag);

270:   epsdiag *= epsdiag_in / nrows;

272:   PetscInfo(NULL, "   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag), (double)droptol);

274:   if (droptol < 1e-10) droptol = 1e-10;

276:   retval.nrows        = nrows;
277:   retval.ncols        = nrows;
278:   retval.nnz          = pattern.nnz / 10;
279:   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
280:   retval.block_data   = PETSC_TRUE;

282:   spbas_allocate_pattern(&retval, do_values);
283:   PetscArrayzero(retval.row_nnz, nrows);
284:   spbas_allocate_data(&retval);
285:   retval.nnz = 0;

287:   PetscMalloc1(nrows, &diag);
288:   PetscCalloc1(nrows, &val);
289:   PetscCalloc1(nrows, &lvec);
290:   PetscCalloc1(nrows, &max_row_nnz);

292:   /* Count the nonzeros on transpose of pattern */
293:   for (i = 0; i < nrows; i++) {
294:     p_nnz  = pattern.row_nnz[i];
295:     p_icol = pattern.icols[i];
296:     for (j = 0; j < p_nnz; j++) max_row_nnz[i + p_icol[j]]++;
297:   }

299:   /* Calculate rows of L */
300:   for (i = 0; i < nrows; i++) {
301:     p_nnz  = pattern.row_nnz[i];
302:     p_icol = pattern.icols[i];

304:     r_nnz  = retval.row_nnz[i];
305:     r_icol = retval.icols[i];
306:     r_val  = retval.values[i];
307:     A_nnz  = ai[rip[i] + 1] - ai[rip[i]];
308:     A_icol = &aj[ai[rip[i]]];
309:     A_val  = &aa[ai[rip[i]]];

311:     /* Calculate  val += A(i,i:n)'; */
312:     for (j = 0; j < A_nnz; j++) {
313:       k = riip[A_icol[j]];
314:       if (k >= i) val[k] = A_val[j];
315:     }

317:     /*  Add regularization */
318:     val[i] += epsdiag;

320:     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
321:         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
322:     for (j = 0; j < r_nnz; j++) {
323:       k       = r_icol[j];
324:       lvec[k] = diag[k] * r_val[j];
325:       val[i] -= r_val[j] * lvec[k];
326:     }

328:     /* Calculate the new diagonal */
329:     diag[i] = val[i];
330:     if (PetscRealPart(diag[i]) < droptol) {
331:       PetscInfo(NULL, "Error in spbas_incomplete_cholesky:\n");
332:       PetscInfo(NULL, "Negative diagonal in row %" PetscInt_FMT "\n", i + 1);

334:       /* Delete the whole matrix at once. */
335:       spbas_delete(retval);
336:       *success = PETSC_FALSE;
337:       return 0;
338:     }

340:     /* If necessary, allocate arrays */
341:     if (r_nnz == 0) {
342:       PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
344:       r_icol = retval.icols[i];
345:       r_val  = retval.values[i];
346:     }

348:     /* Now, fill in */
349:     r_icol[r_nnz] = i;
350:     r_val[r_nnz]  = 1.0;
351:     r_nnz++;
352:     retval.row_nnz[i]++;

354:     retval.nnz += r_nnz;

356:     /* Calculate
357:         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
358:     for (j = 1; j < p_nnz; j++) {
359:       k       = i + p_icol[j];
360:       r1_icol = retval.icols[k];
361:       r1_val  = retval.values[k];
362:       for (jL = 0; jL < retval.row_nnz[k]; jL++) val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
363:       val[k] /= diag[i];

365:       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k]) < -droptol) {
366:         /* If necessary, allocate arrays */
367:         if (!retval.row_nnz[k]) {
368:           PetscBool flag, success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
369:           if (!success) {
370:             spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
371:             flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
373:             r_icol = retval.icols[i];
374:           }
375:         }

377:         retval.icols[k][retval.row_nnz[k]]  = i;
378:         retval.values[k][retval.row_nnz[k]] = val[k];
379:         retval.row_nnz[k]++;
380:       }
381:       val[k] = 0;
382:     }

384:     /* Erase the values used in the work arrays */
385:     for (j = 0; j < r_nnz; j++) lvec[r_icol[j]] = 0;
386:   }

388:   PetscFree(lvec);
389:   PetscFree(val);

391:   spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
392:   PetscFree(max_row_nnz);

394:   /* Place the inverse of the diagonals in the matrix */
395:   for (i = 0; i < nrows; i++) {
396:     r_nnz = retval.row_nnz[i];

398:     retval.values[i][r_nnz - 1] = 1.0 / diag[i];
399:     for (j = 0; j < r_nnz - 1; j++) retval.values[i][j] *= -1;
400:   }
401:   PetscFree(diag);
402:   *matrix_L = retval;
403:   *success  = PETSC_TRUE;
404:   return 0;
405: }