Actual source code: ispai.c


  2: /*
  3:    3/99 Modified by Stephen Barnard to support SPAI version 3.0
  4: */

  6: /*
  7:       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
  8:    Code written by Stephen Barnard.

 10:       Note: there is some BAD memory bleeding below!

 12:       This code needs work

 14:    1) get rid of all memory bleeding
 15:    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
 16:       rather than having the sp flag for PC_SPAI
 17:    3) fix to set the block size based on the matrix block size

 19: */
 20: #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */

 22: #include <petsc/private/pcimpl.h>
 23: #include <../src/ksp/pc/impls/spai/petscspai.h>

 25: /*
 26:     These are the SPAI include files
 27: */
 28: EXTERN_C_BEGIN
 29: #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
 30: #include <spai.h>
 31: #include <matrix.h>
 32: EXTERN_C_END

 34: extern PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
 35: extern PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);
 36: extern PetscErrorCode ConvertVectorToVec(MPI_Comm, vector *, Vec *);
 37: extern PetscErrorCode MM_to_PETSC(char *, char *, char *);

 39: typedef struct {
 40:   matrix *B;  /* matrix in SPAI format */
 41:   matrix *BT; /* transpose of matrix in SPAI format */
 42:   matrix *M;  /* the approximate inverse in SPAI format */

 44:   Mat PM; /* the approximate inverse PETSc format */

 46:   double epsilon;    /* tolerance */
 47:   int    nbsteps;    /* max number of "improvement" steps per line */
 48:   int    max;        /* max dimensions of is_I, q, etc. */
 49:   int    maxnew;     /* max number of new entries per step */
 50:   int    block_size; /* constant block size */
 51:   int    cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
 52:   int    verbose;    /* SPAI prints timing and statistics */

 54:   int      sp;        /* symmetric nonzero pattern */
 55:   MPI_Comm comm_spai; /* communicator to be used with spai */
 56: } PC_SPAI;

 58: static PetscErrorCode PCSetUp_SPAI(PC pc)
 59: {
 60:   PC_SPAI *ispai = (PC_SPAI *)pc->data;
 61:   Mat      AT;

 63:   init_SPAI();

 65:   if (ispai->sp) {
 66:     ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B);
 67:   } else {
 68:     /* Use the transpose to get the column nonzero structure. */
 69:     MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT);
 70:     ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B);
 71:     MatDestroy(&AT);
 72:   }

 74:   /* Destroy the transpose */
 75:   /* Don't know how to do it. PETSc developers? */

 77:   /* construct SPAI preconditioner */
 78:   /* FILE *messages */    /* file for warning messages */
 79:   /* double epsilon */    /* tolerance */
 80:   /* int nbsteps */       /* max number of "improvement" steps per line */
 81:   /* int max */           /* max dimensions of is_I, q, etc. */
 82:   /* int maxnew */        /* max number of new entries per step */
 83:   /* int block_size */    /* block_size == 1 specifies scalar elements
 84:                               block_size == n specifies nxn constant-block elements
 85:                               block_size == 0 specifies variable-block elements */
 86:   /* int cache_size */    /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
 87:   /* int    verbose    */ /* verbose == 0 specifies that SPAI is silent
 88:                               verbose == 1 prints timing and matrix statistics */

 90:   bspai(ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose);

 92:   ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM);

 94:   /* free the SPAI matrices */
 95:   sp_free_matrix(ispai->B);
 96:   sp_free_matrix(ispai->M);
 97:   return 0;
 98: }

100: static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y)
101: {
102:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

104:   /* Now using PETSc's multiply */
105:   MatMult(ispai->PM, xx, y);
106:   return 0;
107: }

109: static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y)
110: {
111:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

113:   /* Now using PETSc's multiply */
114:   MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y);
115:   return 0;
116: }

118: static PetscErrorCode PCDestroy_SPAI(PC pc)
119: {
120:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

122:   MatDestroy(&ispai->PM);
123:   MPI_Comm_free(&(ispai->comm_spai));
124:   PetscFree(pc->data);
125:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL);
126:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL);
127:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL);
128:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL);
129:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL);
130:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL);
131:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL);
132:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL);
133:   return 0;
134: }

136: static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer)
137: {
138:   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
139:   PetscBool iascii;

141:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
142:   if (iascii) {
143:     PetscViewerASCIIPrintf(viewer, "    epsilon %g\n", ispai->epsilon);
144:     PetscViewerASCIIPrintf(viewer, "    nbsteps %d\n", ispai->nbsteps);
145:     PetscViewerASCIIPrintf(viewer, "    max %d\n", ispai->max);
146:     PetscViewerASCIIPrintf(viewer, "    maxnew %d\n", ispai->maxnew);
147:     PetscViewerASCIIPrintf(viewer, "    block_size %d\n", ispai->block_size);
148:     PetscViewerASCIIPrintf(viewer, "    cache_size %d\n", ispai->cache_size);
149:     PetscViewerASCIIPrintf(viewer, "    verbose %d\n", ispai->verbose);
150:     PetscViewerASCIIPrintf(viewer, "    sp %d\n", ispai->sp);
151:   }
152:   return 0;
153: }

155: static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1)
156: {
157:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

159:   ispai->epsilon = (double)epsilon1;
160:   return 0;
161: }

163: static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1)
164: {
165:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

167:   ispai->nbsteps = (int)nbsteps1;
168:   return 0;
169: }

171: /* added 1/7/99 g.h. */
172: static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1)
173: {
174:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

176:   ispai->max = (int)max1;
177:   return 0;
178: }

180: static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1)
181: {
182:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

184:   ispai->maxnew = (int)maxnew1;
185:   return 0;
186: }

188: static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1)
189: {
190:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

192:   ispai->block_size = (int)block_size1;
193:   return 0;
194: }

196: static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size)
197: {
198:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

200:   ispai->cache_size = (int)cache_size;
201:   return 0;
202: }

204: static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose)
205: {
206:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

208:   ispai->verbose = (int)verbose;
209:   return 0;
210: }

212: static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp)
213: {
214:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

216:   ispai->sp = (int)sp;
217:   return 0;
218: }

220: /*@
221:   PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner

223:   Input Parameters:
224: + pc - the preconditioner
225: - eps - epsilon (default .4)

227:   Note:
228:     Espilon must be between 0 and 1. It controls the
229:                  quality of the approximation of M to the inverse of
230:                  A. Higher values of epsilon lead to more work, more
231:                  fill, and usually better preconditioners. In many
232:                  cases the best choice of epsilon is the one that
233:                  divides the total solution time equally between the
234:                  preconditioner and the solver.

236:   Level: intermediate

238: .seealso: `PCSPAI`, `PCSetType()`
239:   @*/
240: PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1)
241: {
242:   PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1));
243:   return 0;
244: }

246: /*@
247:   PCSPAISetNBSteps - set maximum number of improvement steps per row in
248:         the `PCSPAI` preconditioner

250:   Input Parameters:
251: + pc - the preconditioner
252: - n - number of steps (default 5)

254:   Note:
255:     `PCSPAI` constructs to approximation to every column of
256:                  the exact inverse of A in a series of improvement
257:                  steps. The quality of the approximation is determined
258:                  by epsilon. If an approximation achieving an accuracy
259:                  of epsilon is not obtained after ns steps, SPAI simply
260:                  uses the best approximation constructed so far.

262:   Level: intermediate

264: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()`
265: @*/
266: PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1)
267: {
268:   PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1));
269:   return 0;
270: }

272: /* added 1/7/99 g.h. */
273: /*@
274:   PCSPAISetMax - set the size of various working buffers in
275:         the `PCSPAI` preconditioner

277:   Input Parameters:
278: + pc - the preconditioner
279: - n - size (default is 5000)

281:   Level: intermediate

283: .seealso: `PCSPAI`, `PCSetType()`
284: @*/
285: PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1)
286: {
287:   PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1));
288:   return 0;
289: }

291: /*@
292:   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
293:    in `PCSPAI` preconditioner

295:   Input Parameters:
296: + pc - the preconditioner
297: - n - maximum number (default 5)

299:   Level: intermediate

301: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()`
302: @*/
303: PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1)
304: {
305:   PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1));
306:   return 0;
307: }

309: /*@
310:   PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner

312:   Input Parameters:
313: + pc - the preconditioner
314: - n - block size (default 1)

316:   Notes:
317:     A block
318:                  size of 1 treats A as a matrix of scalar elements. A
319:                  block size of s > 1 treats A as a matrix of sxs
320:                  blocks. A block size of 0 treats A as a matrix with
321:                  variable sized blocks, which are determined by
322:                  searching for dense square diagonal blocks in A.
323:                  This can be very effective for finite-element
324:                  matrices.

326:                  SPAI will convert A to block form, use a block
327:                  version of the preconditioner algorithm, and then
328:                  convert the result back to scalar form.

330:                  In many cases the a block-size parameter other than 1
331:                  can lead to very significant improvement in
332:                  performance.

334:   Level: intermediate

336: .seealso: `PCSPAI`, `PCSetType()`
337: @*/
338: PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1)
339: {
340:   PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1));
341:   return 0;
342: }

344: /*@
345:   PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner

347:   Input Parameters:
348: + pc - the preconditioner
349: - n -  cache size {0,1,2,3,4,5} (default 5)

351:   Note:
352:     `PCSPAI` uses a hash table to cache messages and avoid
353:                  redundant communication. If suggest always using
354:                  5. This parameter is irrelevant in the serial
355:                  version.

357:   Level: intermediate

359: .seealso: `PCSPAI`, `PCSetType()`
360: @*/
361: PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size)
362: {
363:   PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size));
364:   return 0;
365: }

367: /*@
368:   PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner

370:   Input Parameters:
371: + pc - the preconditioner
372: - n - level (default 1)

374:   Note:
375:     print parameters, timings and matrix statistics

377:   Level: intermediate

379: .seealso: `PCSPAI`, `PCSetType()`
380: @*/
381: PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose)
382: {
383:   PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose));
384:   return 0;
385: }

387: /*@
388:   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner

390:   Input Parameters:
391: + pc - the preconditioner
392: - n - 0 or 1

394:   Note:
395:     If A has a symmetric nonzero pattern use -sp 1 to
396:                  improve performance by eliminating some communication
397:                  in the parallel version. Even if A does not have a
398:                  symmetric nonzero pattern -sp 1 may well lead to good
399:                  results, but the code will not follow the published
400:                  SPAI algorithm exactly.

402:   Level: intermediate

404: .seealso: `PCSPAI`, `PCSetType()`
405: @*/
406: PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp)
407: {
408:   PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp));
409:   return 0;
410: }

412: static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject)
413: {
414:   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
415:   int       nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
416:   double    epsilon1;
417:   PetscBool flg;

419:   PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
420:   PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg);
421:   if (flg) PCSPAISetEpsilon(pc, epsilon1);
422:   PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg);
423:   if (flg) PCSPAISetNBSteps(pc, nbsteps1);
424:   /* added 1/7/99 g.h. */
425:   PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg);
426:   if (flg) PCSPAISetMax(pc, max1);
427:   PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg);
428:   if (flg) PCSPAISetMaxNew(pc, maxnew1);
429:   PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg);
430:   if (flg) PCSPAISetBlockSize(pc, block_size1);
431:   PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg);
432:   if (flg) PCSPAISetCacheSize(pc, cache_size);
433:   PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg);
434:   if (flg) PCSPAISetVerbose(pc, verbose);
435:   PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg);
436:   if (flg) PCSPAISetSp(pc, sp);
437:   PetscOptionsHeadEnd();
438:   return 0;
439: }

441: /*MC
442:    PCSPAI - Use the Sparse Approximate Inverse method

444:    Options Database Keys:
445: +  -pc_spai_epsilon <eps> - set tolerance
446: .  -pc_spai_nbstep <n> - set nbsteps
447: .  -pc_spai_max <m> - set max
448: .  -pc_spai_max_new <m> - set maxnew
449: .  -pc_spai_block_size <n> - set block size
450: .  -pc_spai_cache_size <n> - set cache size
451: .  -pc_spai_sp <m> - set sp
452: -  -pc_spai_set_verbose <true,false> - verbose output

454:    Level: beginner

456:    Note:
457:     This only works with `MATAIJ` matrices.

459:    References:
460:  . * -  Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3)

462: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
463:           `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
464:           `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()`
465: M*/

467: PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
468: {
469:   PC_SPAI *ispai;

471:   PetscNew(&ispai);
472:   pc->data = ispai;

474:   pc->ops->destroy         = PCDestroy_SPAI;
475:   pc->ops->apply           = PCApply_SPAI;
476:   pc->ops->matapply        = PCMatApply_SPAI;
477:   pc->ops->applyrichardson = 0;
478:   pc->ops->setup           = PCSetUp_SPAI;
479:   pc->ops->view            = PCView_SPAI;
480:   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;

482:   ispai->epsilon    = .4;
483:   ispai->nbsteps    = 5;
484:   ispai->max        = 5000;
485:   ispai->maxnew     = 5;
486:   ispai->block_size = 1;
487:   ispai->cache_size = 5;
488:   ispai->verbose    = 0;

490:   ispai->sp = 1;
491:   MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai));

493:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI);
494:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI);
495:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI);
496:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI);
497:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI);
498:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI);
499:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI);
500:   PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI);
501:   return 0;
502: }

504: /*
505:    Converts from a PETSc matrix to an SPAI matrix
506: */
507: PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B)
508: {
509:   matrix                  *M;
510:   int                      i, j, col;
511:   int                      row_indx;
512:   int                      len, pe, local_indx, start_indx;
513:   int                     *mapping;
514:   const int               *cols;
515:   const double            *vals;
516:   int                      n, mnl, nnl, nz, rstart, rend;
517:   PetscMPIInt              size, rank;
518:   struct compressed_lines *rows;

520:   MPI_Comm_size(comm, &size);
521:   MPI_Comm_rank(comm, &rank);
522:   MatGetSize(A, &n, &n);
523:   MatGetLocalSize(A, &mnl, &nnl);

525:   /*
526:     not sure why a barrier is required. commenting out
527:   MPI_Barrier(comm);
528:   */

530:   M = new_matrix((SPAI_Comm)comm);

532:   M->n              = n;
533:   M->bs             = 1;
534:   M->max_block_size = 1;

536:   M->mnls          = (int *)malloc(sizeof(int) * size);
537:   M->start_indices = (int *)malloc(sizeof(int) * size);
538:   M->pe            = (int *)malloc(sizeof(int) * n);
539:   M->block_sizes   = (int *)malloc(sizeof(int) * n);
540:   for (i = 0; i < n; i++) M->block_sizes[i] = 1;

542:   MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm);

544:   M->start_indices[0] = 0;
545:   for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];

547:   M->mnl            = M->mnls[M->myid];
548:   M->my_start_index = M->start_indices[M->myid];

550:   for (i = 0; i < size; i++) {
551:     start_indx = M->start_indices[i];
552:     for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
553:   }

555:   if (AT) {
556:     M->lines = new_compressed_lines(M->mnls[rank], 1);
557:   } else {
558:     M->lines = new_compressed_lines(M->mnls[rank], 0);
559:   }

561:   rows = M->lines;

563:   /* Determine the mapping from global indices to pointers */
564:   PetscMalloc1(M->n, &mapping);
565:   pe         = 0;
566:   local_indx = 0;
567:   for (i = 0; i < M->n; i++) {
568:     if (local_indx >= M->mnls[pe]) {
569:       pe++;
570:       local_indx = 0;
571:     }
572:     mapping[i] = local_indx + M->start_indices[pe];
573:     local_indx++;
574:   }

576:   /************** Set up the row structure *****************/

578:   MatGetOwnershipRange(A, &rstart, &rend);
579:   for (i = rstart; i < rend; i++) {
580:     row_indx = i - rstart;
581:     MatGetRow(A, i, &nz, &cols, &vals);
582:     /* allocate buffers */
583:     rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
584:     rows->A[row_indx]    = (double *)malloc(nz * sizeof(double));
585:     /* copy the matrix */
586:     for (j = 0; j < nz; j++) {
587:       col = cols[j];
588:       len = rows->len[row_indx]++;

590:       rows->ptrs[row_indx][len] = mapping[col];
591:       rows->A[row_indx][len]    = vals[j];
592:     }
593:     rows->slen[row_indx] = rows->len[row_indx];

595:     MatRestoreRow(A, i, &nz, &cols, &vals);
596:   }

598:   /************** Set up the column structure *****************/

600:   if (AT) {
601:     for (i = rstart; i < rend; i++) {
602:       row_indx = i - rstart;
603:       MatGetRow(AT, i, &nz, &cols, &vals);
604:       /* allocate buffers */
605:       rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
606:       /* copy the matrix (i.e., the structure) */
607:       for (j = 0; j < nz; j++) {
608:         col = cols[j];
609:         len = rows->rlen[row_indx]++;

611:         rows->rptrs[row_indx][len] = mapping[col];
612:       }
613:       MatRestoreRow(AT, i, &nz, &cols, &vals);
614:     }
615:   }

617:   PetscFree(mapping);

619:   order_pointers(M);
620:   M->maxnz = calc_maxnz(M);
621:   *B       = M;
622:   return 0;
623: }

625: /*
626:    Converts from an SPAI matrix B  to a PETSc matrix PB.
627:    This assumes that the SPAI matrix B is stored in
628:    COMPRESSED-ROW format.
629: */
630: PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB)
631: {
632:   PetscMPIInt size, rank;
633:   int         m, n, M, N;
634:   int         d_nz, o_nz;
635:   int        *d_nnz, *o_nnz;
636:   int         i, k, global_row, global_col, first_diag_col, last_diag_col;
637:   PetscScalar val;

639:   MPI_Comm_size(comm, &size);
640:   MPI_Comm_rank(comm, &rank);

642:   m = n = B->mnls[rank];
643:   d_nz = o_nz = 0;

645:   /* Determine preallocation for MatCreateAIJ */
646:   PetscMalloc1(m, &d_nnz);
647:   PetscMalloc1(m, &o_nnz);
648:   for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
649:   first_diag_col = B->start_indices[rank];
650:   last_diag_col  = first_diag_col + B->mnls[rank];
651:   for (i = 0; i < B->mnls[rank]; i++) {
652:     for (k = 0; k < B->lines->len[i]; k++) {
653:       global_col = B->lines->ptrs[i][k];
654:       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
655:       else o_nnz[i]++;
656:     }
657:   }

659:   M = N = B->n;
660:   /* Here we only know how to create AIJ format */
661:   MatCreate(comm, PB);
662:   MatSetSizes(*PB, m, n, M, N);
663:   MatSetType(*PB, MATAIJ);
664:   MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz);
665:   MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz);

667:   for (i = 0; i < B->mnls[rank]; i++) {
668:     global_row = B->start_indices[rank] + i;
669:     for (k = 0; k < B->lines->len[i]; k++) {
670:       global_col = B->lines->ptrs[i][k];

672:       val = B->lines->A[i][k];
673:       MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES);
674:     }
675:   }

677:   PetscFree(d_nnz);
678:   PetscFree(o_nnz);

680:   MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY);
681:   MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY);
682:   return 0;
683: }

685: /*
686:    Converts from an SPAI vector v  to a PETSc vec Pv.
687: */
688: PetscErrorCode ConvertVectorToVec(MPI_Comm comm, vector *v, Vec *Pv)
689: {
690:   PetscMPIInt size, rank;
691:   int         m, M, i, *mnls, *start_indices, *global_indices;

693:   MPI_Comm_size(comm, &size);
694:   MPI_Comm_rank(comm, &rank);

696:   m = v->mnl;
697:   M = v->n;

699:   VecCreateMPI(comm, m, M, Pv);

701:   PetscMalloc1(size, &mnls);
702:   MPI_Allgather(&v->mnl, 1, MPI_INT, mnls, 1, MPI_INT, comm);

704:   PetscMalloc1(size, &start_indices);

706:   start_indices[0] = 0;
707:   for (i = 1; i < size; i++) start_indices[i] = start_indices[i - 1] + mnls[i - 1];

709:   PetscMalloc1(v->mnl, &global_indices);
710:   for (i = 0; i < v->mnl; i++) global_indices[i] = start_indices[rank] + i;

712:   PetscFree(mnls);
713:   PetscFree(start_indices);

715:   VecSetValues(*Pv, v->mnl, global_indices, v->v, INSERT_VALUES);
716:   VecAssemblyBegin(*Pv);
717:   VecAssemblyEnd(*Pv);

719:   PetscFree(global_indices);
720:   return 0;
721: }