Actual source code: ispai.c
2: /*
3: 3/99 Modified by Stephen Barnard to support SPAI version 3.0
4: */
6: /*
7: Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8: Code written by Stephen Barnard.
10: Note: there is some BAD memory bleeding below!
12: This code needs work
14: 1) get rid of all memory bleeding
15: 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16: rather than having the sp flag for PC_SPAI
17: 3) fix to set the block size based on the matrix block size
19: */
20: #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
22: #include <petsc/private/pcimpl.h>
23: #include <../src/ksp/pc/impls/spai/petscspai.h>
25: /*
26: These are the SPAI include files
27: */
28: EXTERN_C_BEGIN
29: #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
30: #include <spai.h>
31: #include <matrix.h>
32: EXTERN_C_END
34: extern PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
35: extern PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);
36: extern PetscErrorCode ConvertVectorToVec(MPI_Comm, vector *, Vec *);
37: extern PetscErrorCode MM_to_PETSC(char *, char *, char *);
39: typedef struct {
40: matrix *B; /* matrix in SPAI format */
41: matrix *BT; /* transpose of matrix in SPAI format */
42: matrix *M; /* the approximate inverse in SPAI format */
44: Mat PM; /* the approximate inverse PETSc format */
46: double epsilon; /* tolerance */
47: int nbsteps; /* max number of "improvement" steps per line */
48: int max; /* max dimensions of is_I, q, etc. */
49: int maxnew; /* max number of new entries per step */
50: int block_size; /* constant block size */
51: int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
52: int verbose; /* SPAI prints timing and statistics */
54: int sp; /* symmetric nonzero pattern */
55: MPI_Comm comm_spai; /* communicator to be used with spai */
56: } PC_SPAI;
58: static PetscErrorCode PCSetUp_SPAI(PC pc)
59: {
60: PC_SPAI *ispai = (PC_SPAI *)pc->data;
61: Mat AT;
63: init_SPAI();
65: if (ispai->sp) {
66: ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B);
67: } else {
68: /* Use the transpose to get the column nonzero structure. */
69: MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT);
70: ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B);
71: MatDestroy(&AT);
72: }
74: /* Destroy the transpose */
75: /* Don't know how to do it. PETSc developers? */
77: /* construct SPAI preconditioner */
78: /* FILE *messages */ /* file for warning messages */
79: /* double epsilon */ /* tolerance */
80: /* int nbsteps */ /* max number of "improvement" steps per line */
81: /* int max */ /* max dimensions of is_I, q, etc. */
82: /* int maxnew */ /* max number of new entries per step */
83: /* int block_size */ /* block_size == 1 specifies scalar elements
84: block_size == n specifies nxn constant-block elements
85: block_size == 0 specifies variable-block elements */
86: /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
87: /* int verbose */ /* verbose == 0 specifies that SPAI is silent
88: verbose == 1 prints timing and matrix statistics */
90: bspai(ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose);
92: ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM);
94: /* free the SPAI matrices */
95: sp_free_matrix(ispai->B);
96: sp_free_matrix(ispai->M);
97: return 0;
98: }
100: static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y)
101: {
102: PC_SPAI *ispai = (PC_SPAI *)pc->data;
104: /* Now using PETSc's multiply */
105: MatMult(ispai->PM, xx, y);
106: return 0;
107: }
109: static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y)
110: {
111: PC_SPAI *ispai = (PC_SPAI *)pc->data;
113: /* Now using PETSc's multiply */
114: MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y);
115: return 0;
116: }
118: static PetscErrorCode PCDestroy_SPAI(PC pc)
119: {
120: PC_SPAI *ispai = (PC_SPAI *)pc->data;
122: MatDestroy(&ispai->PM);
123: MPI_Comm_free(&(ispai->comm_spai));
124: PetscFree(pc->data);
125: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL);
126: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL);
127: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL);
128: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL);
129: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL);
130: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL);
131: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL);
132: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL);
133: return 0;
134: }
136: static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer)
137: {
138: PC_SPAI *ispai = (PC_SPAI *)pc->data;
139: PetscBool iascii;
141: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
142: if (iascii) {
143: PetscViewerASCIIPrintf(viewer, " epsilon %g\n", ispai->epsilon);
144: PetscViewerASCIIPrintf(viewer, " nbsteps %d\n", ispai->nbsteps);
145: PetscViewerASCIIPrintf(viewer, " max %d\n", ispai->max);
146: PetscViewerASCIIPrintf(viewer, " maxnew %d\n", ispai->maxnew);
147: PetscViewerASCIIPrintf(viewer, " block_size %d\n", ispai->block_size);
148: PetscViewerASCIIPrintf(viewer, " cache_size %d\n", ispai->cache_size);
149: PetscViewerASCIIPrintf(viewer, " verbose %d\n", ispai->verbose);
150: PetscViewerASCIIPrintf(viewer, " sp %d\n", ispai->sp);
151: }
152: return 0;
153: }
155: static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1)
156: {
157: PC_SPAI *ispai = (PC_SPAI *)pc->data;
159: ispai->epsilon = (double)epsilon1;
160: return 0;
161: }
163: static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1)
164: {
165: PC_SPAI *ispai = (PC_SPAI *)pc->data;
167: ispai->nbsteps = (int)nbsteps1;
168: return 0;
169: }
171: /* added 1/7/99 g.h. */
172: static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1)
173: {
174: PC_SPAI *ispai = (PC_SPAI *)pc->data;
176: ispai->max = (int)max1;
177: return 0;
178: }
180: static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1)
181: {
182: PC_SPAI *ispai = (PC_SPAI *)pc->data;
184: ispai->maxnew = (int)maxnew1;
185: return 0;
186: }
188: static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1)
189: {
190: PC_SPAI *ispai = (PC_SPAI *)pc->data;
192: ispai->block_size = (int)block_size1;
193: return 0;
194: }
196: static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size)
197: {
198: PC_SPAI *ispai = (PC_SPAI *)pc->data;
200: ispai->cache_size = (int)cache_size;
201: return 0;
202: }
204: static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose)
205: {
206: PC_SPAI *ispai = (PC_SPAI *)pc->data;
208: ispai->verbose = (int)verbose;
209: return 0;
210: }
212: static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp)
213: {
214: PC_SPAI *ispai = (PC_SPAI *)pc->data;
216: ispai->sp = (int)sp;
217: return 0;
218: }
220: /*@
221: PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner
223: Input Parameters:
224: + pc - the preconditioner
225: - eps - epsilon (default .4)
227: Note:
228: Espilon must be between 0 and 1. It controls the
229: quality of the approximation of M to the inverse of
230: A. Higher values of epsilon lead to more work, more
231: fill, and usually better preconditioners. In many
232: cases the best choice of epsilon is the one that
233: divides the total solution time equally between the
234: preconditioner and the solver.
236: Level: intermediate
238: .seealso: `PCSPAI`, `PCSetType()`
239: @*/
240: PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1)
241: {
242: PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1));
243: return 0;
244: }
246: /*@
247: PCSPAISetNBSteps - set maximum number of improvement steps per row in
248: the `PCSPAI` preconditioner
250: Input Parameters:
251: + pc - the preconditioner
252: - n - number of steps (default 5)
254: Note:
255: `PCSPAI` constructs to approximation to every column of
256: the exact inverse of A in a series of improvement
257: steps. The quality of the approximation is determined
258: by epsilon. If an approximation achieving an accuracy
259: of epsilon is not obtained after ns steps, SPAI simply
260: uses the best approximation constructed so far.
262: Level: intermediate
264: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()`
265: @*/
266: PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1)
267: {
268: PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1));
269: return 0;
270: }
272: /* added 1/7/99 g.h. */
273: /*@
274: PCSPAISetMax - set the size of various working buffers in
275: the `PCSPAI` preconditioner
277: Input Parameters:
278: + pc - the preconditioner
279: - n - size (default is 5000)
281: Level: intermediate
283: .seealso: `PCSPAI`, `PCSetType()`
284: @*/
285: PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1)
286: {
287: PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1));
288: return 0;
289: }
291: /*@
292: PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
293: in `PCSPAI` preconditioner
295: Input Parameters:
296: + pc - the preconditioner
297: - n - maximum number (default 5)
299: Level: intermediate
301: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()`
302: @*/
303: PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1)
304: {
305: PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1));
306: return 0;
307: }
309: /*@
310: PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner
312: Input Parameters:
313: + pc - the preconditioner
314: - n - block size (default 1)
316: Notes:
317: A block
318: size of 1 treats A as a matrix of scalar elements. A
319: block size of s > 1 treats A as a matrix of sxs
320: blocks. A block size of 0 treats A as a matrix with
321: variable sized blocks, which are determined by
322: searching for dense square diagonal blocks in A.
323: This can be very effective for finite-element
324: matrices.
326: SPAI will convert A to block form, use a block
327: version of the preconditioner algorithm, and then
328: convert the result back to scalar form.
330: In many cases the a block-size parameter other than 1
331: can lead to very significant improvement in
332: performance.
334: Level: intermediate
336: .seealso: `PCSPAI`, `PCSetType()`
337: @*/
338: PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1)
339: {
340: PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1));
341: return 0;
342: }
344: /*@
345: PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner
347: Input Parameters:
348: + pc - the preconditioner
349: - n - cache size {0,1,2,3,4,5} (default 5)
351: Note:
352: `PCSPAI` uses a hash table to cache messages and avoid
353: redundant communication. If suggest always using
354: 5. This parameter is irrelevant in the serial
355: version.
357: Level: intermediate
359: .seealso: `PCSPAI`, `PCSetType()`
360: @*/
361: PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size)
362: {
363: PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size));
364: return 0;
365: }
367: /*@
368: PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner
370: Input Parameters:
371: + pc - the preconditioner
372: - n - level (default 1)
374: Note:
375: print parameters, timings and matrix statistics
377: Level: intermediate
379: .seealso: `PCSPAI`, `PCSetType()`
380: @*/
381: PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose)
382: {
383: PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose));
384: return 0;
385: }
387: /*@
388: PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner
390: Input Parameters:
391: + pc - the preconditioner
392: - n - 0 or 1
394: Note:
395: If A has a symmetric nonzero pattern use -sp 1 to
396: improve performance by eliminating some communication
397: in the parallel version. Even if A does not have a
398: symmetric nonzero pattern -sp 1 may well lead to good
399: results, but the code will not follow the published
400: SPAI algorithm exactly.
402: Level: intermediate
404: .seealso: `PCSPAI`, `PCSetType()`
405: @*/
406: PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp)
407: {
408: PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp));
409: return 0;
410: }
412: static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject)
413: {
414: PC_SPAI *ispai = (PC_SPAI *)pc->data;
415: int nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
416: double epsilon1;
417: PetscBool flg;
419: PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
420: PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg);
421: if (flg) PCSPAISetEpsilon(pc, epsilon1);
422: PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg);
423: if (flg) PCSPAISetNBSteps(pc, nbsteps1);
424: /* added 1/7/99 g.h. */
425: PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg);
426: if (flg) PCSPAISetMax(pc, max1);
427: PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg);
428: if (flg) PCSPAISetMaxNew(pc, maxnew1);
429: PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg);
430: if (flg) PCSPAISetBlockSize(pc, block_size1);
431: PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg);
432: if (flg) PCSPAISetCacheSize(pc, cache_size);
433: PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg);
434: if (flg) PCSPAISetVerbose(pc, verbose);
435: PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg);
436: if (flg) PCSPAISetSp(pc, sp);
437: PetscOptionsHeadEnd();
438: return 0;
439: }
441: /*MC
442: PCSPAI - Use the Sparse Approximate Inverse method
444: Options Database Keys:
445: + -pc_spai_epsilon <eps> - set tolerance
446: . -pc_spai_nbstep <n> - set nbsteps
447: . -pc_spai_max <m> - set max
448: . -pc_spai_max_new <m> - set maxnew
449: . -pc_spai_block_size <n> - set block size
450: . -pc_spai_cache_size <n> - set cache size
451: . -pc_spai_sp <m> - set sp
452: - -pc_spai_set_verbose <true,false> - verbose output
454: Level: beginner
456: Note:
457: This only works with `MATAIJ` matrices.
459: References:
460: . * - Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3)
462: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
463: `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
464: `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()`
465: M*/
467: PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
468: {
469: PC_SPAI *ispai;
471: PetscNew(&ispai);
472: pc->data = ispai;
474: pc->ops->destroy = PCDestroy_SPAI;
475: pc->ops->apply = PCApply_SPAI;
476: pc->ops->matapply = PCMatApply_SPAI;
477: pc->ops->applyrichardson = 0;
478: pc->ops->setup = PCSetUp_SPAI;
479: pc->ops->view = PCView_SPAI;
480: pc->ops->setfromoptions = PCSetFromOptions_SPAI;
482: ispai->epsilon = .4;
483: ispai->nbsteps = 5;
484: ispai->max = 5000;
485: ispai->maxnew = 5;
486: ispai->block_size = 1;
487: ispai->cache_size = 5;
488: ispai->verbose = 0;
490: ispai->sp = 1;
491: MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai));
493: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI);
494: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI);
495: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI);
496: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI);
497: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI);
498: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI);
499: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI);
500: PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI);
501: return 0;
502: }
504: /*
505: Converts from a PETSc matrix to an SPAI matrix
506: */
507: PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B)
508: {
509: matrix *M;
510: int i, j, col;
511: int row_indx;
512: int len, pe, local_indx, start_indx;
513: int *mapping;
514: const int *cols;
515: const double *vals;
516: int n, mnl, nnl, nz, rstart, rend;
517: PetscMPIInt size, rank;
518: struct compressed_lines *rows;
520: MPI_Comm_size(comm, &size);
521: MPI_Comm_rank(comm, &rank);
522: MatGetSize(A, &n, &n);
523: MatGetLocalSize(A, &mnl, &nnl);
525: /*
526: not sure why a barrier is required. commenting out
527: MPI_Barrier(comm);
528: */
530: M = new_matrix((SPAI_Comm)comm);
532: M->n = n;
533: M->bs = 1;
534: M->max_block_size = 1;
536: M->mnls = (int *)malloc(sizeof(int) * size);
537: M->start_indices = (int *)malloc(sizeof(int) * size);
538: M->pe = (int *)malloc(sizeof(int) * n);
539: M->block_sizes = (int *)malloc(sizeof(int) * n);
540: for (i = 0; i < n; i++) M->block_sizes[i] = 1;
542: MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm);
544: M->start_indices[0] = 0;
545: for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];
547: M->mnl = M->mnls[M->myid];
548: M->my_start_index = M->start_indices[M->myid];
550: for (i = 0; i < size; i++) {
551: start_indx = M->start_indices[i];
552: for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
553: }
555: if (AT) {
556: M->lines = new_compressed_lines(M->mnls[rank], 1);
557: } else {
558: M->lines = new_compressed_lines(M->mnls[rank], 0);
559: }
561: rows = M->lines;
563: /* Determine the mapping from global indices to pointers */
564: PetscMalloc1(M->n, &mapping);
565: pe = 0;
566: local_indx = 0;
567: for (i = 0; i < M->n; i++) {
568: if (local_indx >= M->mnls[pe]) {
569: pe++;
570: local_indx = 0;
571: }
572: mapping[i] = local_indx + M->start_indices[pe];
573: local_indx++;
574: }
576: /************** Set up the row structure *****************/
578: MatGetOwnershipRange(A, &rstart, &rend);
579: for (i = rstart; i < rend; i++) {
580: row_indx = i - rstart;
581: MatGetRow(A, i, &nz, &cols, &vals);
582: /* allocate buffers */
583: rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
584: rows->A[row_indx] = (double *)malloc(nz * sizeof(double));
585: /* copy the matrix */
586: for (j = 0; j < nz; j++) {
587: col = cols[j];
588: len = rows->len[row_indx]++;
590: rows->ptrs[row_indx][len] = mapping[col];
591: rows->A[row_indx][len] = vals[j];
592: }
593: rows->slen[row_indx] = rows->len[row_indx];
595: MatRestoreRow(A, i, &nz, &cols, &vals);
596: }
598: /************** Set up the column structure *****************/
600: if (AT) {
601: for (i = rstart; i < rend; i++) {
602: row_indx = i - rstart;
603: MatGetRow(AT, i, &nz, &cols, &vals);
604: /* allocate buffers */
605: rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
606: /* copy the matrix (i.e., the structure) */
607: for (j = 0; j < nz; j++) {
608: col = cols[j];
609: len = rows->rlen[row_indx]++;
611: rows->rptrs[row_indx][len] = mapping[col];
612: }
613: MatRestoreRow(AT, i, &nz, &cols, &vals);
614: }
615: }
617: PetscFree(mapping);
619: order_pointers(M);
620: M->maxnz = calc_maxnz(M);
621: *B = M;
622: return 0;
623: }
625: /*
626: Converts from an SPAI matrix B to a PETSc matrix PB.
627: This assumes that the SPAI matrix B is stored in
628: COMPRESSED-ROW format.
629: */
630: PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB)
631: {
632: PetscMPIInt size, rank;
633: int m, n, M, N;
634: int d_nz, o_nz;
635: int *d_nnz, *o_nnz;
636: int i, k, global_row, global_col, first_diag_col, last_diag_col;
637: PetscScalar val;
639: MPI_Comm_size(comm, &size);
640: MPI_Comm_rank(comm, &rank);
642: m = n = B->mnls[rank];
643: d_nz = o_nz = 0;
645: /* Determine preallocation for MatCreateAIJ */
646: PetscMalloc1(m, &d_nnz);
647: PetscMalloc1(m, &o_nnz);
648: for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
649: first_diag_col = B->start_indices[rank];
650: last_diag_col = first_diag_col + B->mnls[rank];
651: for (i = 0; i < B->mnls[rank]; i++) {
652: for (k = 0; k < B->lines->len[i]; k++) {
653: global_col = B->lines->ptrs[i][k];
654: if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
655: else o_nnz[i]++;
656: }
657: }
659: M = N = B->n;
660: /* Here we only know how to create AIJ format */
661: MatCreate(comm, PB);
662: MatSetSizes(*PB, m, n, M, N);
663: MatSetType(*PB, MATAIJ);
664: MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz);
665: MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz);
667: for (i = 0; i < B->mnls[rank]; i++) {
668: global_row = B->start_indices[rank] + i;
669: for (k = 0; k < B->lines->len[i]; k++) {
670: global_col = B->lines->ptrs[i][k];
672: val = B->lines->A[i][k];
673: MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES);
674: }
675: }
677: PetscFree(d_nnz);
678: PetscFree(o_nnz);
680: MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY);
681: MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY);
682: return 0;
683: }
685: /*
686: Converts from an SPAI vector v to a PETSc vec Pv.
687: */
688: PetscErrorCode ConvertVectorToVec(MPI_Comm comm, vector *v, Vec *Pv)
689: {
690: PetscMPIInt size, rank;
691: int m, M, i, *mnls, *start_indices, *global_indices;
693: MPI_Comm_size(comm, &size);
694: MPI_Comm_rank(comm, &rank);
696: m = v->mnl;
697: M = v->n;
699: VecCreateMPI(comm, m, M, Pv);
701: PetscMalloc1(size, &mnls);
702: MPI_Allgather(&v->mnl, 1, MPI_INT, mnls, 1, MPI_INT, comm);
704: PetscMalloc1(size, &start_indices);
706: start_indices[0] = 0;
707: for (i = 1; i < size; i++) start_indices[i] = start_indices[i - 1] + mnls[i - 1];
709: PetscMalloc1(v->mnl, &global_indices);
710: for (i = 0; i < v->mnl; i++) global_indices[i] = start_indices[rank] + i;
712: PetscFree(mnls);
713: PetscFree(start_indices);
715: VecSetValues(*Pv, v->mnl, global_indices, v->v, INSERT_VALUES);
716: VecAssemblyBegin(*Pv);
717: VecAssemblyEnd(*Pv);
719: PetscFree(global_indices);
720: return 0;
721: }