Actual source code: matimpl.h
2: #ifndef __MATIMPL_H
5: #include <petscmat.h>
6: #include <petscmatcoarsen.h>
7: #include <petsc/private/petscimpl.h>
9: PETSC_EXTERN PetscBool MatRegisterAllCalled;
10: PETSC_EXTERN PetscBool MatSeqAIJRegisterAllCalled;
11: PETSC_EXTERN PetscBool MatOrderingRegisterAllCalled;
12: PETSC_EXTERN PetscBool MatColoringRegisterAllCalled;
13: PETSC_EXTERN PetscBool MatPartitioningRegisterAllCalled;
14: PETSC_EXTERN PetscBool MatCoarsenRegisterAllCalled;
15: PETSC_EXTERN PetscErrorCode MatRegisterAll(void);
16: PETSC_EXTERN PetscErrorCode MatOrderingRegisterAll(void);
17: PETSC_EXTERN PetscErrorCode MatColoringRegisterAll(void);
18: PETSC_EXTERN PetscErrorCode MatPartitioningRegisterAll(void);
19: PETSC_EXTERN PetscErrorCode MatCoarsenRegisterAll(void);
20: PETSC_EXTERN PetscErrorCode MatSeqAIJRegisterAll(void);
22: /* Gets the root type of the input matrix's type (e.g., MATAIJ for MATSEQAIJ) */
23: PETSC_EXTERN PetscErrorCode MatGetRootType_Private(Mat, MatType *);
25: /* Gets the MPI type corresponding to the input matrix's type (e.g., MATMPIAIJ for MATSEQAIJ) */
26: PETSC_EXTERN PetscErrorCode MatGetMPIMatType_Private(Mat, MatType *);
28: /*
29: This file defines the parts of the matrix data structure that are
30: shared by all matrix types.
31: */
33: /*
34: If you add entries here also add them to the MATOP enum
35: in include/petscmat.h and src/mat/f90-mod/petscmat.h
36: */
37: typedef struct _MatOps *MatOps;
38: struct _MatOps {
39: /* 0*/
40: PetscErrorCode (*setvalues)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
41: PetscErrorCode (*getrow)(Mat, PetscInt, PetscInt *, PetscInt *[], PetscScalar *[]);
42: PetscErrorCode (*restorerow)(Mat, PetscInt, PetscInt *, PetscInt *[], PetscScalar *[]);
43: PetscErrorCode (*mult)(Mat, Vec, Vec);
44: PetscErrorCode (*multadd)(Mat, Vec, Vec, Vec);
45: /* 5*/
46: PetscErrorCode (*multtranspose)(Mat, Vec, Vec);
47: PetscErrorCode (*multtransposeadd)(Mat, Vec, Vec, Vec);
48: PetscErrorCode (*solve)(Mat, Vec, Vec);
49: PetscErrorCode (*solveadd)(Mat, Vec, Vec, Vec);
50: PetscErrorCode (*solvetranspose)(Mat, Vec, Vec);
51: /*10*/
52: PetscErrorCode (*solvetransposeadd)(Mat, Vec, Vec, Vec);
53: PetscErrorCode (*lufactor)(Mat, IS, IS, const MatFactorInfo *);
54: PetscErrorCode (*choleskyfactor)(Mat, IS, const MatFactorInfo *);
55: PetscErrorCode (*sor)(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
56: PetscErrorCode (*transpose)(Mat, MatReuse, Mat *);
57: /*15*/
58: PetscErrorCode (*getinfo)(Mat, MatInfoType, MatInfo *);
59: PetscErrorCode (*equal)(Mat, Mat, PetscBool *);
60: PetscErrorCode (*getdiagonal)(Mat, Vec);
61: PetscErrorCode (*diagonalscale)(Mat, Vec, Vec);
62: PetscErrorCode (*norm)(Mat, NormType, PetscReal *);
63: /*20*/
64: PetscErrorCode (*assemblybegin)(Mat, MatAssemblyType);
65: PetscErrorCode (*assemblyend)(Mat, MatAssemblyType);
66: PetscErrorCode (*setoption)(Mat, MatOption, PetscBool);
67: PetscErrorCode (*zeroentries)(Mat);
68: /*24*/
69: PetscErrorCode (*zerorows)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
70: PetscErrorCode (*lufactorsymbolic)(Mat, Mat, IS, IS, const MatFactorInfo *);
71: PetscErrorCode (*lufactornumeric)(Mat, Mat, const MatFactorInfo *);
72: PetscErrorCode (*choleskyfactorsymbolic)(Mat, Mat, IS, const MatFactorInfo *);
73: PetscErrorCode (*choleskyfactornumeric)(Mat, Mat, const MatFactorInfo *);
74: /*29*/
75: PetscErrorCode (*setup)(Mat);
76: PetscErrorCode (*ilufactorsymbolic)(Mat, Mat, IS, IS, const MatFactorInfo *);
77: PetscErrorCode (*iccfactorsymbolic)(Mat, Mat, IS, const MatFactorInfo *);
78: PetscErrorCode (*getdiagonalblock)(Mat, Mat *);
79: PetscErrorCode (*setinf)(Mat);
80: /*34*/
81: PetscErrorCode (*duplicate)(Mat, MatDuplicateOption, Mat *);
82: PetscErrorCode (*forwardsolve)(Mat, Vec, Vec);
83: PetscErrorCode (*backwardsolve)(Mat, Vec, Vec);
84: PetscErrorCode (*ilufactor)(Mat, IS, IS, const MatFactorInfo *);
85: PetscErrorCode (*iccfactor)(Mat, IS, const MatFactorInfo *);
86: /*39*/
87: PetscErrorCode (*axpy)(Mat, PetscScalar, Mat, MatStructure);
88: PetscErrorCode (*createsubmatrices)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat *[]);
89: PetscErrorCode (*increaseoverlap)(Mat, PetscInt, IS[], PetscInt);
90: PetscErrorCode (*getvalues)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]);
91: PetscErrorCode (*copy)(Mat, Mat, MatStructure);
92: /*44*/
93: PetscErrorCode (*getrowmax)(Mat, Vec, PetscInt[]);
94: PetscErrorCode (*scale)(Mat, PetscScalar);
95: PetscErrorCode (*shift)(Mat, PetscScalar);
96: PetscErrorCode (*diagonalset)(Mat, Vec, InsertMode);
97: PetscErrorCode (*zerorowscolumns)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
98: /*49*/
99: PetscErrorCode (*setrandom)(Mat, PetscRandom);
100: PetscErrorCode (*getrowij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
101: PetscErrorCode (*restorerowij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
102: PetscErrorCode (*getcolumnij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
103: PetscErrorCode (*restorecolumnij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
104: /*54*/
105: PetscErrorCode (*fdcoloringcreate)(Mat, ISColoring, MatFDColoring);
106: PetscErrorCode (*coloringpatch)(Mat, PetscInt, PetscInt, ISColoringValue[], ISColoring *);
107: PetscErrorCode (*setunfactored)(Mat);
108: PetscErrorCode (*permute)(Mat, IS, IS, Mat *);
109: PetscErrorCode (*setvaluesblocked)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
110: /*59*/
111: PetscErrorCode (*createsubmatrix)(Mat, IS, IS, MatReuse, Mat *);
112: PetscErrorCode (*destroy)(Mat);
113: PetscErrorCode (*view)(Mat, PetscViewer);
114: PetscErrorCode (*convertfrom)(Mat, MatType, MatReuse, Mat *);
115: PetscErrorCode (*placeholder_63)(void);
116: /*64*/
117: PetscErrorCode (*matmatmultsymbolic)(Mat, Mat, Mat, PetscReal, Mat);
118: PetscErrorCode (*matmatmultnumeric)(Mat, Mat, Mat, Mat);
119: PetscErrorCode (*setlocaltoglobalmapping)(Mat, ISLocalToGlobalMapping, ISLocalToGlobalMapping);
120: PetscErrorCode (*setvalueslocal)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
121: PetscErrorCode (*zerorowslocal)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
122: /*69*/
123: PetscErrorCode (*getrowmaxabs)(Mat, Vec, PetscInt[]);
124: PetscErrorCode (*getrowminabs)(Mat, Vec, PetscInt[]);
125: PetscErrorCode (*convert)(Mat, MatType, MatReuse, Mat *);
126: PetscErrorCode (*hasoperation)(Mat, MatOperation, PetscBool *);
127: PetscErrorCode (*placeholder_73)(void);
128: /*74*/
129: PetscErrorCode (*setvaluesadifor)(Mat, PetscInt, void *);
130: PetscErrorCode (*fdcoloringapply)(Mat, MatFDColoring, Vec, void *);
131: PetscErrorCode (*setfromoptions)(Mat, PetscOptionItems *);
132: PetscErrorCode (*placeholder_77)(void);
133: PetscErrorCode (*placeholder_78)(void);
134: /*79*/
135: PetscErrorCode (*findzerodiagonals)(Mat, IS *);
136: PetscErrorCode (*mults)(Mat, Vecs, Vecs);
137: PetscErrorCode (*solves)(Mat, Vecs, Vecs);
138: PetscErrorCode (*getinertia)(Mat, PetscInt *, PetscInt *, PetscInt *);
139: PetscErrorCode (*load)(Mat, PetscViewer);
140: /*84*/
141: PetscErrorCode (*issymmetric)(Mat, PetscReal, PetscBool *);
142: PetscErrorCode (*ishermitian)(Mat, PetscReal, PetscBool *);
143: PetscErrorCode (*isstructurallysymmetric)(Mat, PetscBool *);
144: PetscErrorCode (*setvaluesblockedlocal)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
145: PetscErrorCode (*getvecs)(Mat, Vec *, Vec *);
146: /*89*/
147: PetscErrorCode (*placeholder_89)(void);
148: PetscErrorCode (*matmultsymbolic)(Mat, Mat, PetscReal, Mat);
149: PetscErrorCode (*matmultnumeric)(Mat, Mat, Mat);
150: PetscErrorCode (*placeholder_92)(void);
151: PetscErrorCode (*ptapsymbolic)(Mat, Mat, PetscReal, Mat); /* double dispatch wrapper routine */
152: /*94*/
153: PetscErrorCode (*ptapnumeric)(Mat, Mat, Mat); /* double dispatch wrapper routine */
154: PetscErrorCode (*placeholder_95)(void);
155: PetscErrorCode (*mattransposemultsymbolic)(Mat, Mat, PetscReal, Mat);
156: PetscErrorCode (*mattransposemultnumeric)(Mat, Mat, Mat);
157: PetscErrorCode (*bindtocpu)(Mat, PetscBool);
158: /*99*/
159: PetscErrorCode (*productsetfromoptions)(Mat);
160: PetscErrorCode (*productsymbolic)(Mat);
161: PetscErrorCode (*productnumeric)(Mat);
162: PetscErrorCode (*conjugate)(Mat); /* complex conjugate */
163: PetscErrorCode (*viewnative)(Mat, PetscViewer);
164: /*104*/
165: PetscErrorCode (*setvaluesrow)(Mat, PetscInt, const PetscScalar[]);
166: PetscErrorCode (*realpart)(Mat);
167: PetscErrorCode (*imaginarypart)(Mat);
168: PetscErrorCode (*getrowuppertriangular)(Mat);
169: PetscErrorCode (*restorerowuppertriangular)(Mat);
170: /*109*/
171: PetscErrorCode (*matsolve)(Mat, Mat, Mat);
172: PetscErrorCode (*matsolvetranspose)(Mat, Mat, Mat);
173: PetscErrorCode (*getrowmin)(Mat, Vec, PetscInt[]);
174: PetscErrorCode (*getcolumnvector)(Mat, Vec, PetscInt);
175: PetscErrorCode (*missingdiagonal)(Mat, PetscBool *, PetscInt *);
176: /*114*/
177: PetscErrorCode (*getseqnonzerostructure)(Mat, Mat *);
178: PetscErrorCode (*create)(Mat);
179: PetscErrorCode (*getghosts)(Mat, PetscInt *, const PetscInt *[]);
180: PetscErrorCode (*getlocalsubmatrix)(Mat, IS, IS, Mat *);
181: PetscErrorCode (*restorelocalsubmatrix)(Mat, IS, IS, Mat *);
182: /*119*/
183: PetscErrorCode (*multdiagonalblock)(Mat, Vec, Vec);
184: PetscErrorCode (*hermitiantranspose)(Mat, MatReuse, Mat *);
185: PetscErrorCode (*multhermitiantranspose)(Mat, Vec, Vec);
186: PetscErrorCode (*multhermitiantransposeadd)(Mat, Vec, Vec, Vec);
187: PetscErrorCode (*getmultiprocblock)(Mat, MPI_Comm, MatReuse, Mat *);
188: /*124*/
189: PetscErrorCode (*findnonzerorows)(Mat, IS *);
190: PetscErrorCode (*getcolumnreductions)(Mat, PetscInt, PetscReal *);
191: PetscErrorCode (*invertblockdiagonal)(Mat, const PetscScalar **);
192: PetscErrorCode (*invertvariableblockdiagonal)(Mat, PetscInt, const PetscInt *, PetscScalar *);
193: PetscErrorCode (*createsubmatricesmpi)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat **);
194: /*129*/
195: PetscErrorCode (*setvaluesbatch)(Mat, PetscInt, PetscInt, PetscInt *, const PetscScalar *);
196: PetscErrorCode (*placeholder_130)(void);
197: PetscErrorCode (*transposematmultsymbolic)(Mat, Mat, PetscReal, Mat);
198: PetscErrorCode (*transposematmultnumeric)(Mat, Mat, Mat);
199: PetscErrorCode (*transposecoloringcreate)(Mat, ISColoring, MatTransposeColoring);
200: /*134*/
201: PetscErrorCode (*transcoloringapplysptoden)(MatTransposeColoring, Mat, Mat);
202: PetscErrorCode (*transcoloringapplydentosp)(MatTransposeColoring, Mat, Mat);
203: PetscErrorCode (*placeholder_136)(void);
204: PetscErrorCode (*rartsymbolic)(Mat, Mat, PetscReal, Mat); /* double dispatch wrapper routine */
205: PetscErrorCode (*rartnumeric)(Mat, Mat, Mat); /* double dispatch wrapper routine */
206: /*139*/
207: PetscErrorCode (*setblocksizes)(Mat, PetscInt, PetscInt);
208: PetscErrorCode (*aypx)(Mat, PetscScalar, Mat, MatStructure);
209: PetscErrorCode (*residual)(Mat, Vec, Vec, Vec);
210: PetscErrorCode (*fdcoloringsetup)(Mat, ISColoring, MatFDColoring);
211: PetscErrorCode (*findoffblockdiagonalentries)(Mat, IS *);
212: PetscErrorCode (*creatempimatconcatenateseqmat)(MPI_Comm, Mat, PetscInt, MatReuse, Mat *);
213: /*145*/
214: PetscErrorCode (*destroysubmatrices)(PetscInt, Mat *[]);
215: PetscErrorCode (*mattransposesolve)(Mat, Mat, Mat);
216: PetscErrorCode (*getvalueslocal)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]);
217: PetscErrorCode (*creategraph)(Mat, PetscBool, PetscBool, PetscReal, Mat *);
218: PetscErrorCode (*dummy)(Mat);
219: /*150*/
220: PetscErrorCode (*transposesymbolic)(Mat, Mat *);
221: };
222: /*
223: If you add MatOps entries above also add them to the MATOP enum
224: in include/petscmat.h and src/mat/f90-mod/petscmat.h
225: */
227: #include <petscsys.h>
228: PETSC_EXTERN PetscErrorCode MatRegisterOp(MPI_Comm, const char[], PetscVoidFunction, const char[], PetscInt, ...);
229: PETSC_EXTERN PetscErrorCode MatQueryOp(MPI_Comm, PetscVoidFunction *, const char[], PetscInt, ...);
231: typedef struct _p_MatRootName *MatRootName;
232: struct _p_MatRootName {
233: char *rname, *sname, *mname;
234: MatRootName next;
235: };
237: PETSC_EXTERN MatRootName MatRootNameList;
239: /*
240: Utility private matrix routines
241: */
242: PETSC_INTERN PetscErrorCode MatFindNonzeroRowsOrCols_Basic(Mat, PetscBool, PetscReal, IS *);
243: PETSC_INTERN PetscErrorCode MatConvert_Basic(Mat, MatType, MatReuse, Mat *);
244: PETSC_INTERN PetscErrorCode MatConvert_Shell(Mat, MatType, MatReuse, Mat *);
245: PETSC_INTERN PetscErrorCode MatConvertFrom_Shell(Mat, MatType, MatReuse, Mat *);
246: PETSC_INTERN PetscErrorCode MatCopy_Basic(Mat, Mat, MatStructure);
247: PETSC_INTERN PetscErrorCode MatDiagonalSet_Default(Mat, Vec, InsertMode);
248: #if defined(PETSC_HAVE_SCALAPACK)
249: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
250: #endif
251: PETSC_INTERN PetscErrorCode MatSetPreallocationCOO_Basic(Mat, PetscCount, const PetscInt[], const PetscInt[]);
252: PETSC_INTERN PetscErrorCode MatSetValuesCOO_Basic(Mat, const PetscScalar[], InsertMode);
254: /* these callbacks rely on the old matrix function pointers for
255: matmat operations. They are unsafe, and should be removed.
256: However, the amount of work needed to clean up all the
257: implementations is not negligible */
258: PETSC_INTERN PetscErrorCode MatProductSymbolic_AB(Mat);
259: PETSC_INTERN PetscErrorCode MatProductNumeric_AB(Mat);
260: PETSC_INTERN PetscErrorCode MatProductSymbolic_AtB(Mat);
261: PETSC_INTERN PetscErrorCode MatProductNumeric_AtB(Mat);
262: PETSC_INTERN PetscErrorCode MatProductSymbolic_ABt(Mat);
263: PETSC_INTERN PetscErrorCode MatProductNumeric_ABt(Mat);
264: PETSC_INTERN PetscErrorCode MatProductNumeric_PtAP(Mat);
265: PETSC_INTERN PetscErrorCode MatProductNumeric_RARt(Mat);
266: PETSC_INTERN PetscErrorCode MatProductSymbolic_ABC(Mat);
267: PETSC_INTERN PetscErrorCode MatProductNumeric_ABC(Mat);
269: PETSC_INTERN PetscErrorCode MatProductCreate_Private(Mat, Mat, Mat, Mat);
270: /* this callback handles all the different triple products and
271: does not rely on the function pointers; used by cuSPARSE/hipSPARSE and KOKKOS-KERNELS */
272: PETSC_INTERN PetscErrorCode MatProductSymbolic_ABC_Basic(Mat);
274: /* CreateGraph is common to AIJ seq and mpi */
275: PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat, PetscBool, PetscBool, PetscReal, Mat *);
277: #if defined(PETSC_CLANG_STATIC_ANALYZER)
278: template <typename Tm>
279: void MatCheckPreallocated(Tm, int);
280: template <typename Tm>
281: void MatCheckProduct(Tm, int);
282: #else /* PETSC_CLANG_STATIC_ANALYZER */
283: #if defined(PETSC_USE_DEBUG)
284: #define MatCheckPreallocated(A, arg) \
285: do { \
287: } while (0)
288: #else
289: #define MatCheckPreallocated(A, arg) \
290: do { \
291: } while (0)
292: #endif
294: #if defined(PETSC_USE_DEBUG)
295: #define MatCheckProduct(A, arg) \
296: do { \
298: } while (0)
299: #else
300: #define MatCheckProduct(A, arg) \
301: do { \
302: } while (0)
303: #endif
304: #endif /* PETSC_CLANG_STATIC_ANALYZER */
306: /*
307: The stash is used to temporarily store inserted matrix values that
308: belong to another processor. During the assembly phase the stashed
309: values are moved to the correct processor and
310: */
312: typedef struct _MatStashSpace *PetscMatStashSpace;
314: struct _MatStashSpace {
315: PetscMatStashSpace next;
316: PetscScalar *space_head, *val;
317: PetscInt *idx, *idy;
318: PetscInt total_space_size;
319: PetscInt local_used;
320: PetscInt local_remaining;
321: };
323: PETSC_EXTERN PetscErrorCode PetscMatStashSpaceGet(PetscInt, PetscInt, PetscMatStashSpace *);
324: PETSC_EXTERN PetscErrorCode PetscMatStashSpaceContiguous(PetscInt, PetscMatStashSpace *, PetscScalar *, PetscInt *, PetscInt *);
325: PETSC_EXTERN PetscErrorCode PetscMatStashSpaceDestroy(PetscMatStashSpace *);
327: typedef struct {
328: PetscInt count;
329: } MatStashHeader;
331: typedef struct {
332: void *buffer; /* Of type blocktype, dynamically constructed */
333: PetscInt count;
334: char pending;
335: } MatStashFrame;
337: typedef struct _MatStash MatStash;
338: struct _MatStash {
339: PetscInt nmax; /* maximum stash size */
340: PetscInt umax; /* user specified max-size */
341: PetscInt oldnmax; /* the nmax value used previously */
342: PetscInt n; /* stash size */
343: PetscInt bs; /* block size of the stash */
344: PetscInt reallocs; /* preserve the no of mallocs invoked */
345: PetscMatStashSpace space_head, space; /* linked list to hold stashed global row/column numbers and matrix values */
347: PetscErrorCode (*ScatterBegin)(Mat, MatStash *, PetscInt *);
348: PetscErrorCode (*ScatterGetMesg)(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
349: PetscErrorCode (*ScatterEnd)(MatStash *);
350: PetscErrorCode (*ScatterDestroy)(MatStash *);
352: /* The following variables are used for communication */
353: MPI_Comm comm;
354: PetscMPIInt size, rank;
355: PetscMPIInt tag1, tag2;
356: MPI_Request *send_waits; /* array of send requests */
357: MPI_Request *recv_waits; /* array of receive requests */
358: MPI_Status *send_status; /* array of send status */
359: PetscInt nsends, nrecvs; /* numbers of sends and receives */
360: PetscScalar *svalues; /* sending data */
361: PetscInt *sindices;
362: PetscScalar **rvalues; /* receiving data (values) */
363: PetscInt **rindices; /* receiving data (indices) */
364: PetscInt nprocessed; /* number of messages already processed */
365: PetscMPIInt *flg_v; /* indicates what messages have arrived so far and from whom */
366: PetscBool reproduce;
367: PetscInt reproduce_count;
369: /* The following variables are used for BTS communication */
370: PetscBool first_assembly_done; /* Is the first time matrix assembly done? */
371: PetscBool use_status; /* Use MPI_Status to determine number of items in each message */
372: PetscMPIInt nsendranks;
373: PetscMPIInt nrecvranks;
374: PetscMPIInt *sendranks;
375: PetscMPIInt *recvranks;
376: MatStashHeader *sendhdr, *recvhdr;
377: MatStashFrame *sendframes; /* pointers to the main messages */
378: MatStashFrame *recvframes;
379: MatStashFrame *recvframe_active;
380: PetscInt recvframe_i; /* index of block within active frame */
381: PetscMPIInt recvframe_count; /* Count actually sent for current frame */
382: PetscInt recvcount; /* Number of receives processed so far */
383: PetscMPIInt *some_indices; /* From last call to MPI_Waitsome */
384: MPI_Status *some_statuses; /* Statuses from last call to MPI_Waitsome */
385: PetscMPIInt some_count; /* Number of requests completed in last call to MPI_Waitsome */
386: PetscMPIInt some_i; /* Index of request currently being processed */
387: MPI_Request *sendreqs;
388: MPI_Request *recvreqs;
389: PetscSegBuffer segsendblocks;
390: PetscSegBuffer segrecvframe;
391: PetscSegBuffer segrecvblocks;
392: MPI_Datatype blocktype;
393: size_t blocktype_size;
394: InsertMode *insertmode; /* Pointer to check mat->insertmode and set upon message arrival in case no local values have been set. */
395: };
397: #if !defined(PETSC_HAVE_MPIUNI)
398: PETSC_INTERN PetscErrorCode MatStashScatterDestroy_BTS(MatStash *);
399: #endif
400: PETSC_INTERN PetscErrorCode MatStashCreate_Private(MPI_Comm, PetscInt, MatStash *);
401: PETSC_INTERN PetscErrorCode MatStashDestroy_Private(MatStash *);
402: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Private(MatStash *);
403: PETSC_INTERN PetscErrorCode MatStashSetInitialSize_Private(MatStash *, PetscInt);
404: PETSC_INTERN PetscErrorCode MatStashGetInfo_Private(MatStash *, PetscInt *, PetscInt *);
405: PETSC_INTERN PetscErrorCode MatStashValuesRow_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscBool);
406: PETSC_INTERN PetscErrorCode MatStashValuesCol_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscInt, PetscBool);
407: PETSC_INTERN PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscInt, PetscInt, PetscInt);
408: PETSC_INTERN PetscErrorCode MatStashValuesColBlocked_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscInt, PetscInt, PetscInt);
409: PETSC_INTERN PetscErrorCode MatStashScatterBegin_Private(Mat, MatStash *, PetscInt *);
410: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Private(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
411: PETSC_INTERN PetscErrorCode MatGetInfo_External(Mat, MatInfoType, MatInfo *);
413: typedef struct {
414: PetscInt dim;
415: PetscInt dims[4];
416: PetscInt starts[4];
417: PetscBool noc; /* this is a single component problem, hence user will not set MatStencil.c */
418: } MatStencilInfo;
420: /* Info about using compressed row format */
421: typedef struct {
422: PetscBool use; /* indicates compressed rows have been checked and will be used */
423: PetscInt nrows; /* number of non-zero rows */
424: PetscInt *i; /* compressed row pointer */
425: PetscInt *rindex; /* compressed row index */
426: } Mat_CompressedRow;
427: PETSC_EXTERN PetscErrorCode MatCheckCompressedRow(Mat, PetscInt, Mat_CompressedRow *, PetscInt *, PetscInt, PetscReal);
429: typedef struct { /* used by MatCreateRedundantMatrix() for reusing matredundant */
430: PetscInt nzlocal, nsends, nrecvs;
431: PetscMPIInt *send_rank, *recv_rank;
432: PetscInt *sbuf_nz, *rbuf_nz, *sbuf_j, **rbuf_j;
433: PetscScalar *sbuf_a, **rbuf_a;
434: MPI_Comm subcomm; /* when user does not provide a subcomm */
435: IS isrow, iscol;
436: Mat *matseq;
437: } Mat_Redundant;
439: typedef struct { /* used by MatProduct() */
440: MatProductType type;
441: char *alg;
442: Mat A, B, C, Dwork;
443: PetscBool symbolic_used_the_fact_A_is_symmetric; /* Symbolic phase took advantage of the fact that A is symmetric, and optimized e.g. AtB as AB. Then, .. */
444: PetscBool symbolic_used_the_fact_B_is_symmetric; /* .. in the numeric phase, if a new A is not symmetric (but has the same sparsity as the old A therefore .. */
445: PetscBool symbolic_used_the_fact_C_is_symmetric; /* MatMatMult(A,B,MAT_REUSE_MATRIX,..&C) is still legitimate), we need to redo symbolic! */
446: PetscReal fill;
447: PetscBool api_user; /* used to distinguish command line options and to indicate the matrix values are ready to be consumed at symbolic phase if needed */
449: /* Some products may display the information on the algorithm used */
450: PetscErrorCode (*view)(Mat, PetscViewer);
452: /* many products have intermediate data structures, each specific to Mat types and product type */
453: PetscBool clear; /* whether or not to clear the data structures after MatProductNumeric has been called */
454: void *data; /* where to stash those structures */
455: PetscErrorCode (*destroy)(void *); /* destroy routine */
456: } Mat_Product;
458: struct _p_Mat {
459: PETSCHEADER(struct _MatOps);
460: PetscLayout rmap, cmap;
461: void *data; /* implementation-specific data */
462: MatFactorType factortype; /* MAT_FACTOR_LU, ILU, CHOLESKY or ICC */
463: PetscBool trivialsymbolic; /* indicates the symbolic factorization doesn't actually do a symbolic factorization, it is delayed to the numeric factorization */
464: PetscBool canuseordering; /* factorization can use ordering provide to routine (most PETSc implementations) */
465: MatOrderingType preferredordering[MAT_FACTOR_NUM_TYPES]; /* what is the preferred (or default) ordering for the matrix solver type */
466: PetscBool assembled; /* is the matrix assembled? */
467: PetscBool was_assembled; /* new values inserted into assembled mat */
468: PetscInt num_ass; /* number of times matrix has been assembled */
469: PetscObjectState nonzerostate; /* each time new nonzeros locations are introduced into the matrix this is updated */
470: PetscObjectState ass_nonzerostate; /* nonzero state at last assembly */
471: MatInfo info; /* matrix information */
472: InsertMode insertmode; /* have values been inserted in matrix or added? */
473: MatStash stash, bstash; /* used for assembling off-proc mat emements */
474: MatNullSpace nullsp; /* null space (operator is singular) */
475: MatNullSpace transnullsp; /* null space of transpose of operator */
476: MatNullSpace nearnullsp; /* near null space to be used by multigrid methods */
477: PetscInt congruentlayouts; /* are the rows and columns layouts congruent? */
478: PetscBool preallocated;
479: MatStencilInfo stencil; /* information for structured grid */
480: PetscBool3 symmetric, hermitian, structurally_symmetric, spd;
481: PetscBool symmetry_eternal, structural_symmetry_eternal, spd_eternal;
482: PetscBool nooffprocentries, nooffproczerorows;
483: PetscBool assembly_subset; /* set by MAT_SUBSET_OFF_PROC_ENTRIES */
484: PetscBool submat_singleis; /* for efficient PCSetUp_ASM() */
485: PetscBool structure_only;
486: PetscBool sortedfull; /* full, sorted rows are inserted */
487: PetscBool force_diagonals; /* set by MAT_FORCE_DIAGONAL_ENTRIES */
488: #if defined(PETSC_HAVE_DEVICE)
489: PetscOffloadMask offloadmask; /* a mask which indicates where the valid matrix data is (GPU, CPU or both) */
490: PetscBool boundtocpu;
491: PetscBool bindingpropagates;
492: #endif
493: char *defaultrandtype;
494: void *spptr; /* pointer for special library like SuperLU */
495: char *solvertype;
496: PetscBool checksymmetryonassembly, checknullspaceonassembly;
497: PetscReal checksymmetrytol;
498: Mat schur; /* Schur complement matrix */
499: MatFactorSchurStatus schur_status; /* status of the Schur complement matrix */
500: Mat_Redundant *redundant; /* used by MatCreateRedundantMatrix() */
501: PetscBool erroriffailure; /* Generate an error if detected (for example a zero pivot) instead of returning */
502: MatFactorError factorerrortype; /* type of error in factorization */
503: PetscReal factorerror_zeropivot_value; /* If numerical zero pivot was detected this is the computed value */
504: PetscInt factorerror_zeropivot_row; /* Row where zero pivot was detected */
505: PetscInt nblocks, *bsizes; /* support for MatSetVariableBlockSizes() */
506: PetscInt p_cstart, p_rank, p_cend, n_rank; /* Information from parallel MatComputeVariableBlockEnvelope() */
507: PetscBool p_parallel;
508: char *defaultvectype;
509: Mat_Product *product;
510: PetscBool form_explicit_transpose; /* hint to generate an explicit mat tranpsose for operations like MatMultTranspose() */
511: PetscBool transupdated; /* whether or not the explicitly generated transpose is up-to-date */
512: char *factorprefix; /* the prefix to use with factored matrix that is created */
513: };
515: PETSC_INTERN PetscErrorCode MatAXPY_Basic(Mat, PetscScalar, Mat, MatStructure);
516: PETSC_INTERN PetscErrorCode MatAXPY_BasicWithPreallocation(Mat, Mat, PetscScalar, Mat, MatStructure);
517: PETSC_INTERN PetscErrorCode MatAXPY_Basic_Preallocate(Mat, Mat, Mat *);
518: PETSC_INTERN PetscErrorCode MatAXPY_Dense_Nest(Mat, PetscScalar, Mat);
520: /*
521: Utility for MatFactor (Schur complement)
522: */
523: PETSC_INTERN PetscErrorCode MatFactorFactorizeSchurComplement_Private(Mat);
524: PETSC_INTERN PetscErrorCode MatFactorInvertSchurComplement_Private(Mat);
525: PETSC_INTERN PetscErrorCode MatFactorUpdateSchurStatus_Private(Mat);
526: PETSC_INTERN PetscErrorCode MatFactorSetUpInPlaceSchur_Private(Mat);
528: /*
529: Utility for MatZeroRows
530: */
531: PETSC_INTERN PetscErrorCode MatZeroRowsMapLocal_Private(Mat, PetscInt, const PetscInt *, PetscInt *, PetscInt **);
533: /*
534: Utility for MatView/MatLoad
535: */
536: PETSC_INTERN PetscErrorCode MatView_Binary_BlockSizes(Mat, PetscViewer);
537: PETSC_INTERN PetscErrorCode MatLoad_Binary_BlockSizes(Mat, PetscViewer);
539: /*
540: Object for partitioning graphs
541: */
543: typedef struct _MatPartitioningOps *MatPartitioningOps;
544: struct _MatPartitioningOps {
545: PetscErrorCode (*apply)(MatPartitioning, IS *);
546: PetscErrorCode (*applynd)(MatPartitioning, IS *);
547: PetscErrorCode (*setfromoptions)(MatPartitioning, PetscOptionItems *);
548: PetscErrorCode (*destroy)(MatPartitioning);
549: PetscErrorCode (*view)(MatPartitioning, PetscViewer);
550: PetscErrorCode (*improve)(MatPartitioning, IS *);
551: };
553: struct _p_MatPartitioning {
554: PETSCHEADER(struct _MatPartitioningOps);
555: Mat adj;
556: PetscInt *vertex_weights;
557: PetscReal *part_weights;
558: PetscInt n; /* number of partitions */
559: PetscInt ncon; /* number of vertex weights per vertex */
560: void *data;
561: PetscInt setupcalled;
562: PetscBool use_edge_weights; /* A flag indicates whether or not to use edge weights */
563: };
565: /* needed for parallel nested dissection by ParMetis and PTSCOTCH */
566: PETSC_INTERN PetscErrorCode MatPartitioningSizesToSep_Private(PetscInt, PetscInt[], PetscInt[], PetscInt[]);
568: /*
569: Object for coarsen graphs
570: */
571: typedef struct _MatCoarsenOps *MatCoarsenOps;
572: struct _MatCoarsenOps {
573: PetscErrorCode (*apply)(MatCoarsen);
574: PetscErrorCode (*setfromoptions)(MatCoarsen, PetscOptionItems *);
575: PetscErrorCode (*destroy)(MatCoarsen);
576: PetscErrorCode (*view)(MatCoarsen, PetscViewer);
577: };
579: struct _p_MatCoarsen {
580: PETSCHEADER(struct _MatCoarsenOps);
581: Mat graph;
582: void *subctx;
583: /* */
584: PetscBool strict_aggs;
585: IS perm;
586: PetscCoarsenData *agg_lists;
587: };
589: PETSC_EXTERN PetscErrorCode MatCoarsenMISKSetDistance(MatCoarsen, PetscInt);
590: PETSC_EXTERN PetscErrorCode MatCoarsenMISKGetDistance(MatCoarsen, PetscInt *);
592: /*
593: MatFDColoring is used to compute Jacobian matrices efficiently
594: via coloring. The data structure is explained below in an example.
596: Color = 0 1 0 2 | 2 3 0
597: ---------------------------------------------------
598: 00 01 | 05
599: 10 11 | 14 15 Processor 0
600: 22 23 | 25
601: 32 33 |
602: ===================================================
603: | 44 45 46
604: 50 | 55 Processor 1
605: | 64 66
606: ---------------------------------------------------
608: ncolors = 4;
610: ncolumns = {2,1,1,0}
611: columns = {{0,2},{1},{3},{}}
612: nrows = {4,2,3,3}
613: rows = {{0,1,2,3},{0,1},{1,2,3},{0,1,2}}
614: vwscale = {dx(0),dx(1),dx(2),dx(3)} MPI Vec
615: vscale = {dx(0),dx(1),dx(2),dx(3),dx(4),dx(5)} Seq Vec
617: ncolumns = {1,0,1,1}
618: columns = {{6},{},{4},{5}}
619: nrows = {3,0,2,2}
620: rows = {{0,1,2},{},{1,2},{1,2}}
621: vwscale = {dx(4),dx(5),dx(6)} MPI Vec
622: vscale = {dx(0),dx(4),dx(5),dx(6)} Seq Vec
624: See the routine MatFDColoringApply() for how this data is used
625: to compute the Jacobian.
627: */
628: typedef struct {
629: PetscInt row;
630: PetscInt col;
631: PetscScalar *valaddr; /* address of value */
632: } MatEntry;
634: typedef struct {
635: PetscInt row;
636: PetscScalar *valaddr; /* address of value */
637: } MatEntry2;
639: struct _p_MatFDColoring {
640: PETSCHEADER(int);
641: PetscInt M, N, m; /* total rows, columns; local rows */
642: PetscInt rstart; /* first row owned by local processor */
643: PetscInt ncolors; /* number of colors */
644: PetscInt *ncolumns; /* number of local columns for a color */
645: PetscInt **columns; /* lists the local columns of each color (using global column numbering) */
646: IS *isa; /* these are the IS that contain the column values given in columns */
647: PetscInt *nrows; /* number of local rows for each color */
648: MatEntry *matentry; /* holds (row, column, address of value) for Jacobian matrix entry */
649: MatEntry2 *matentry2; /* holds (row, address of value) for Jacobian matrix entry */
650: PetscScalar *dy; /* store a block of F(x+dx)-F(x) when J is in BAIJ format */
651: PetscReal error_rel; /* square root of relative error in computing function */
652: PetscReal umin; /* minimum allowable u'dx value */
653: Vec w1, w2, w3; /* work vectors used in computing Jacobian */
654: PetscBool fset; /* indicates that the initial function value F(X) is set */
655: PetscErrorCode (*f)(void); /* function that defines Jacobian */
656: void *fctx; /* optional user-defined context for use by the function f */
657: Vec vscale; /* holds FD scaling, i.e. 1/dx for each perturbed column */
658: PetscInt currentcolor; /* color for which function evaluation is being done now */
659: const char *htype; /* "wp" or "ds" */
660: ISColoringType ctype; /* IS_COLORING_GLOBAL or IS_COLORING_LOCAL */
661: PetscInt brows, bcols; /* number of block rows or columns for speedup inserting the dense matrix into sparse Jacobian */
662: PetscBool setupcalled; /* true if setup has been called */
663: PetscBool viewed; /* true if the -mat_fd_coloring_view has been triggered already */
664: void (*ftn_func_pointer)(void), *ftn_func_cntx; /* serve the same purpose as *fortran_func_pointers in PETSc objects */
665: PetscObjectId matid; /* matrix this object was created with, must always be the same */
666: };
668: typedef struct _MatColoringOps *MatColoringOps;
669: struct _MatColoringOps {
670: PetscErrorCode (*destroy)(MatColoring);
671: PetscErrorCode (*setfromoptions)(MatColoring, PetscOptionItems *);
672: PetscErrorCode (*view)(MatColoring, PetscViewer);
673: PetscErrorCode (*apply)(MatColoring, ISColoring *);
674: PetscErrorCode (*weights)(MatColoring, PetscReal **, PetscInt **);
675: };
677: struct _p_MatColoring {
678: PETSCHEADER(struct _MatColoringOps);
679: Mat mat;
680: PetscInt dist; /* distance of the coloring */
681: PetscInt maxcolors; /* the maximum number of colors returned, maxcolors=1 for MIS */
682: void *data; /* inner context */
683: PetscBool valid; /* check to see if what is produced is a valid coloring */
684: MatColoringWeightType weight_type; /* type of weight computation to be performed */
685: PetscReal *user_weights; /* custom weights and permutation */
686: PetscInt *user_lperm;
687: PetscBool valid_iscoloring; /* check to see if matcoloring is produced a valid iscoloring */
688: };
690: struct _p_MatTransposeColoring {
691: PETSCHEADER(int);
692: PetscInt M, N, m; /* total rows, columns; local rows */
693: PetscInt rstart; /* first row owned by local processor */
694: PetscInt ncolors; /* number of colors */
695: PetscInt *ncolumns; /* number of local columns for a color */
696: PetscInt *nrows; /* number of local rows for each color */
697: PetscInt currentcolor; /* color for which function evaluation is being done now */
698: ISColoringType ctype; /* IS_COLORING_GLOBAL or IS_COLORING_LOCAL */
700: PetscInt *colorforrow, *colorforcol; /* pointer to rows and columns */
701: PetscInt *rows; /* lists the local rows for each color (using the local row numbering) */
702: PetscInt *den2sp; /* maps (row,color) in the dense matrix to index of sparse matrix array a->a */
703: PetscInt *columns; /* lists the local columns of each color (using global column numbering) */
704: PetscInt brows; /* number of rows for efficient implementation of MatTransColoringApplyDenToSp() */
705: PetscInt *lstart; /* array used for loop over row blocks of Csparse */
706: };
708: /*
709: Null space context for preconditioner/operators
710: */
711: struct _p_MatNullSpace {
712: PETSCHEADER(int);
713: PetscBool has_cnst;
714: PetscInt n;
715: Vec *vecs;
716: PetscScalar *alpha; /* for projections */
717: PetscErrorCode (*remove)(MatNullSpace, Vec, void *); /* for user provided removal function */
718: void *rmctx; /* context for remove() function */
719: };
721: /*
722: Checking zero pivot for LU, ILU preconditioners.
723: */
724: typedef struct {
725: PetscInt nshift, nshift_max;
726: PetscReal shift_amount, shift_lo, shift_hi, shift_top, shift_fraction;
727: PetscBool newshift;
728: PetscReal rs; /* active row sum of abs(offdiagonals) */
729: PetscScalar pv; /* pivot of the active row */
730: } FactorShiftCtx;
732: /*
733: Used by MatCreateSubMatrices_MPIXAIJ_Local()
734: */
735: #include <petscctable.h>
736: typedef struct { /* used by MatCreateSubMatrices_MPIAIJ_SingleIS_Local() and MatCreateSubMatrices_MPIAIJ_Local */
737: PetscInt id; /* index of submats, only submats[0] is responsible for deleting some arrays below */
738: PetscInt nrqs, nrqr;
739: PetscInt **rbuf1, **rbuf2, **rbuf3, **sbuf1, **sbuf2;
740: PetscInt **ptr;
741: PetscInt *tmp;
742: PetscInt *ctr;
743: PetscInt *pa; /* proc array */
744: PetscInt *req_size, *req_source1, *req_source2;
745: PetscBool allcolumns, allrows;
746: PetscBool singleis;
747: PetscInt *row2proc; /* row to proc map */
748: PetscInt nstages;
749: #if defined(PETSC_USE_CTABLE)
750: PetscTable cmap, rmap;
751: PetscInt *cmap_loc, *rmap_loc;
752: #else
753: PetscInt *cmap, *rmap;
754: #endif
756: PetscErrorCode (*destroy)(Mat);
757: } Mat_SubSppt;
759: PETSC_EXTERN PetscErrorCode MatTransposeCheckNonzeroState_Private(Mat, Mat);
761: /*
762: Used by MatTranspose() and potentially other functions to track the matrix used in the generation of another matrix
763: */
764: typedef struct {
765: PetscObjectId id;
766: PetscObjectState state;
767: PetscObjectState nonzerostate;
768: } MatParentState;
770: PETSC_EXTERN PetscErrorCode MatFactorDumpMatrix(Mat);
771: PETSC_INTERN PetscErrorCode MatShift_Basic(Mat, PetscScalar);
772: PETSC_INTERN PetscErrorCode MatSetBlockSizes_Default(Mat, PetscInt, PetscInt);
774: static inline PetscErrorCode MatPivotCheck_nz(Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PetscInt row)
775: {
776: PetscReal _rs = sctx->rs;
777: PetscReal _zero = info->zeropivot * _rs;
779: if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
780: /* force |diag| > zeropivot*rs */
781: if (!sctx->nshift) sctx->shift_amount = info->shiftamount;
782: else sctx->shift_amount *= 2.0;
783: sctx->newshift = PETSC_TRUE;
784: (sctx->nshift)++;
785: } else {
786: sctx->newshift = PETSC_FALSE;
787: }
788: return 0;
789: }
791: static inline PetscErrorCode MatPivotCheck_pd(Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PetscInt row)
792: {
793: PetscReal _rs = sctx->rs;
794: PetscReal _zero = info->zeropivot * _rs;
796: if (PetscRealPart(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
797: /* force matfactor to be diagonally dominant */
798: if (sctx->nshift == sctx->nshift_max) {
799: sctx->shift_fraction = sctx->shift_hi;
800: } else {
801: sctx->shift_lo = sctx->shift_fraction;
802: sctx->shift_fraction = (sctx->shift_hi + sctx->shift_lo) / 2.;
803: }
804: sctx->shift_amount = sctx->shift_fraction * sctx->shift_top;
805: sctx->nshift++;
806: sctx->newshift = PETSC_TRUE;
807: } else {
808: sctx->newshift = PETSC_FALSE;
809: }
810: return 0;
811: }
813: static inline PetscErrorCode MatPivotCheck_inblocks(Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PetscInt row)
814: {
815: PetscReal _zero = info->zeropivot;
817: if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
818: sctx->pv += info->shiftamount;
819: sctx->shift_amount = 0.0;
820: sctx->nshift++;
821: }
822: sctx->newshift = PETSC_FALSE;
823: return 0;
824: }
826: static inline PetscErrorCode MatPivotCheck_none(Mat fact, Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PetscInt row)
827: {
828: PetscReal _zero = info->zeropivot;
830: sctx->newshift = PETSC_FALSE;
831: if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
833: PetscInfo(mat, "Detected zero pivot in factorization in row %" PetscInt_FMT " value %g tolerance %g\n", row, (double)PetscAbsScalar(sctx->pv), (double)_zero);
834: fact->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
835: fact->factorerror_zeropivot_value = PetscAbsScalar(sctx->pv);
836: fact->factorerror_zeropivot_row = row;
837: }
838: return 0;
839: }
841: static inline PetscErrorCode MatPivotCheck(Mat fact, Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PetscInt row)
842: {
843: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) MatPivotCheck_nz(mat, info, sctx, row);
844: else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) MatPivotCheck_pd(mat, info, sctx, row);
845: else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) MatPivotCheck_inblocks(mat, info, sctx, row);
846: else MatPivotCheck_none(fact, mat, info, sctx, row);
847: return 0;
848: }
850: #include <petscbt.h>
851: /*
852: Create and initialize a linked list
853: Input Parameters:
854: idx_start - starting index of the list
855: lnk_max - max value of lnk indicating the end of the list
856: nlnk - max length of the list
857: Output Parameters:
858: lnk - list initialized
859: bt - PetscBT (bitarray) with all bits set to false
860: lnk_empty - flg indicating the list is empty
861: */
862: #define PetscLLCreate(idx_start, lnk_max, nlnk, lnk, bt) (PetscMalloc1(nlnk, &lnk) || PetscBTCreate(nlnk, &(bt)) || (lnk[idx_start] = lnk_max, 0))
864: #define PetscLLCreate_new(idx_start, lnk_max, nlnk, lnk, bt, lnk_empty) (PetscMalloc1(nlnk, &lnk) || PetscBTCreate(nlnk, &(bt)) || (lnk_empty = PETSC_TRUE, 0) || (lnk[idx_start] = lnk_max, 0))
866: static inline PetscErrorCode PetscLLInsertLocation_Private(PetscBool assume_sorted, PetscInt k, PetscInt idx_start, PetscInt entry, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnkdata, PetscInt *PETSC_RESTRICT lnk)
867: {
868: PetscInt location;
870: /* start from the beginning if entry < previous entry */
871: if (!assume_sorted && k && entry < *lnkdata) *lnkdata = idx_start;
872: /* search for insertion location */
873: do {
874: location = *lnkdata;
875: *lnkdata = lnk[location];
876: } while (entry > *lnkdata);
877: /* insertion location is found, add entry into lnk */
878: lnk[location] = entry;
879: lnk[entry] = *lnkdata;
880: ++(*nlnk);
881: *lnkdata = entry; /* next search starts from here if next_entry > entry */
882: return 0;
883: }
885: static inline PetscErrorCode PetscLLAdd_Private(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt, PetscBool assume_sorted)
886: {
887: *nlnk = 0;
888: for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
889: const PetscInt entry = indices[k];
891: if (!PetscBTLookupSet(bt, entry)) PetscLLInsertLocation_Private(assume_sorted, k, idx_start, entry, nlnk, &lnkdata, lnk);
892: }
893: return 0;
894: }
896: /*
897: Add an index set into a sorted linked list
898: Input Parameters:
899: nidx - number of input indices
900: indices - integer array
901: idx_start - starting index of the list
902: lnk - linked list(an integer array) that is created
903: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
904: output Parameters:
905: nlnk - number of newly added indices
906: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
907: bt - updated PetscBT (bitarray)
908: */
909: static inline PetscErrorCode PetscLLAdd(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt)
910: {
911: PetscLLAdd_Private(nidx, indices, idx_start, nlnk, lnk, bt, PETSC_FALSE);
912: return 0;
913: }
915: /*
916: Add a SORTED ascending index set into a sorted linked list - same as PetscLLAdd() bus skip 'if (_k && _entry < _lnkdata) _lnkdata = idx_start;'
917: Input Parameters:
918: nidx - number of input indices
919: indices - sorted integer array
920: idx_start - starting index of the list
921: lnk - linked list(an integer array) that is created
922: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
923: output Parameters:
924: nlnk - number of newly added indices
925: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
926: bt - updated PetscBT (bitarray)
927: */
928: static inline PetscErrorCode PetscLLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt)
929: {
930: PetscLLAdd_Private(nidx, indices, idx_start, nlnk, lnk, bt, PETSC_TRUE);
931: return 0;
932: }
934: /*
935: Add a permuted index set into a sorted linked list
936: Input Parameters:
937: nidx - number of input indices
938: indices - integer array
939: perm - permutation of indices
940: idx_start - starting index of the list
941: lnk - linked list(an integer array) that is created
942: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
943: output Parameters:
944: nlnk - number of newly added indices
945: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
946: bt - updated PetscBT (bitarray)
947: */
948: static inline PetscErrorCode PetscLLAddPerm(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, const PetscInt *PETSC_RESTRICT perm, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt)
949: {
950: *nlnk = 0;
951: for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
952: const PetscInt entry = perm[indices[k]];
954: if (!PetscBTLookupSet(bt, entry)) PetscLLInsertLocation_Private(PETSC_FALSE, k, idx_start, entry, nlnk, &lnkdata, lnk);
955: }
956: return 0;
957: }
959: #if 0
960: /* this appears to be unused? */
961: static inline PetscErrorCode PetscLLAddSorted_new(PetscInt nidx, PetscInt *indices, PetscInt idx_start, PetscBool *lnk_empty, PetscInt *nlnk, PetscInt *lnk, PetscBT bt)
962: {
963: PetscInt lnkdata = idx_start;
965: if (*lnk_empty) {
966: for (PetscInt k = 0; k < nidx; ++k) {
967: const PetscInt entry = indices[k], location = lnkdata;
969: PetscBTSet(bt,entry); /* mark the new entry */
970: lnkdata = lnk[location];
971: /* insertion location is found, add entry into lnk */
972: lnk[location] = entry;
973: lnk[entry] = lnkdata;
974: lnkdata = entry; /* next search starts from here */
975: }
976: /* lnk[indices[nidx-1]] = lnk[idx_start];
977: lnk[idx_start] = indices[0];
978: PetscBTSet(bt,indices[0]);
979: for (_k=1; _k<nidx; _k++) {
980: PetscBTSet(bt,indices[_k]);
981: lnk[indices[_k-1]] = indices[_k];
982: }
983: */
984: *nlnk = nidx;
985: *lnk_empty = PETSC_FALSE;
986: } else {
987: *nlnk = 0;
988: for (PetscInt k = 0; k < nidx; ++k) {
989: const PetscInt entry = indices[k];
991: if (!PetscBTLookupSet(bt,entry)) PetscLLInsertLocation_Private(PETSC_TRUE,k,idx_start,entry,nlnk,&lnkdata,lnk);
992: }
993: }
994: return 0;
995: }
996: #endif
998: /*
999: Add a SORTED index set into a sorted linked list used for LUFactorSymbolic()
1000: Same as PetscLLAddSorted() with an additional operation:
1001: count the number of input indices that are no larger than 'diag'
1002: Input Parameters:
1003: indices - sorted integer array
1004: idx_start - starting index of the list, index of pivot row
1005: lnk - linked list(an integer array) that is created
1006: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1007: diag - index of the active row in LUFactorSymbolic
1008: nzbd - number of input indices with indices <= idx_start
1009: im - im[idx_start] is initialized as num of nonzero entries in row=idx_start
1010: output Parameters:
1011: nlnk - number of newly added indices
1012: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
1013: bt - updated PetscBT (bitarray)
1014: im - im[idx_start]: unchanged if diag is not an entry
1015: : num of entries with indices <= diag if diag is an entry
1016: */
1017: static inline PetscErrorCode PetscLLAddSortedLU(const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt, PetscInt diag, PetscInt nzbd, PetscInt *PETSC_RESTRICT im)
1018: {
1019: const PetscInt nidx = im[idx_start] - nzbd; /* num of entries with idx_start < index <= diag */
1021: *nlnk = 0;
1022: for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
1023: const PetscInt entry = indices[k];
1025: ++nzbd;
1026: if (entry == diag) im[idx_start] = nzbd;
1027: if (!PetscBTLookupSet(bt, entry)) PetscLLInsertLocation_Private(PETSC_TRUE, k, idx_start, entry, nlnk, &lnkdata, lnk);
1028: }
1029: return 0;
1030: }
1032: /*
1033: Copy data on the list into an array, then initialize the list
1034: Input Parameters:
1035: idx_start - starting index of the list
1036: lnk_max - max value of lnk indicating the end of the list
1037: nlnk - number of data on the list to be copied
1038: lnk - linked list
1039: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1040: output Parameters:
1041: indices - array that contains the copied data
1042: lnk - linked list that is cleaned and initialize
1043: bt - PetscBT (bitarray) with all bits set to false
1044: */
1045: static inline PetscErrorCode PetscLLClean(PetscInt idx_start, PetscInt lnk_max, PetscInt nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT indices, PetscBT bt)
1046: {
1047: for (PetscInt j = 0, idx = idx_start; j < nlnk; ++j) {
1048: idx = lnk[idx];
1049: indices[j] = idx;
1050: PetscBTClear(bt, idx);
1051: }
1052: lnk[idx_start] = lnk_max;
1053: return 0;
1054: }
1056: /*
1057: Free memories used by the list
1058: */
1059: #define PetscLLDestroy(lnk, bt) (PetscFree(lnk) || PetscBTDestroy(&(bt)))
1061: /* Routines below are used for incomplete matrix factorization */
1062: /*
1063: Create and initialize a linked list and its levels
1064: Input Parameters:
1065: idx_start - starting index of the list
1066: lnk_max - max value of lnk indicating the end of the list
1067: nlnk - max length of the list
1068: Output Parameters:
1069: lnk - list initialized
1070: lnk_lvl - array of size nlnk for storing levels of lnk
1071: bt - PetscBT (bitarray) with all bits set to false
1072: */
1073: #define PetscIncompleteLLCreate(idx_start, lnk_max, nlnk, lnk, lnk_lvl, bt) (PetscIntMultError(2, nlnk, NULL) || PetscMalloc1(2 * nlnk, &lnk) || PetscBTCreate(nlnk, &(bt)) || (lnk[idx_start] = lnk_max, lnk_lvl = lnk + nlnk, 0))
1075: static inline PetscErrorCode PetscIncompleteLLInsertLocation_Private(PetscBool assume_sorted, PetscInt k, PetscInt idx_start, PetscInt entry, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnkdata, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscInt newval)
1076: {
1077: PetscLLInsertLocation_Private(assume_sorted, k, idx_start, entry, nlnk, lnkdata, lnk);
1078: lnklvl[entry] = newval;
1079: return 0;
1080: }
1082: /*
1083: Initialize a sorted linked list used for ILU and ICC
1084: Input Parameters:
1085: nidx - number of input idx
1086: idx - integer array used for storing column indices
1087: idx_start - starting index of the list
1088: perm - indices of an IS
1089: lnk - linked list(an integer array) that is created
1090: lnklvl - levels of lnk
1091: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1092: output Parameters:
1093: nlnk - number of newly added idx
1094: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1095: lnklvl - levels of lnk
1096: bt - updated PetscBT (bitarray)
1097: */
1098: static inline PetscErrorCode PetscIncompleteLLInit(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscInt idx_start, const PetscInt *PETSC_RESTRICT perm, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt)
1099: {
1100: *nlnk = 0;
1101: for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
1102: const PetscInt entry = perm[idx[k]];
1104: if (!PetscBTLookupSet(bt, entry)) PetscIncompleteLLInsertLocation_Private(PETSC_FALSE, k, idx_start, entry, nlnk, &lnkdata, lnk, lnklvl, 0);
1105: }
1106: return 0;
1107: }
1109: static inline PetscErrorCode PetscIncompleteLLAdd_Private(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt, PetscInt prow_offset, PetscBool assume_sorted)
1110: {
1111: *nlnk = 0;
1112: for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
1113: const PetscInt incrlev = idxlvl[k] + prow_offset + 1;
1115: if (incrlev <= level) {
1116: const PetscInt entry = idx[k];
1118: if (!PetscBTLookupSet(bt, entry)) PetscIncompleteLLInsertLocation_Private(assume_sorted, k, idx_start, entry, nlnk, &lnkdata, lnk, lnklvl, incrlev);
1119: else if (lnklvl[entry] > incrlev) lnklvl[entry] = incrlev; /* existing entry */
1120: }
1121: }
1122: return 0;
1123: }
1125: /*
1126: Add a SORTED index set into a sorted linked list for ICC
1127: Input Parameters:
1128: nidx - number of input indices
1129: idx - sorted integer array used for storing column indices
1130: level - level of fill, e.g., ICC(level)
1131: idxlvl - level of idx
1132: idx_start - starting index of the list
1133: lnk - linked list(an integer array) that is created
1134: lnklvl - levels of lnk
1135: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1136: idxlvl_prow - idxlvl[prow], where prow is the row number of the idx
1137: output Parameters:
1138: nlnk - number of newly added indices
1139: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1140: lnklvl - levels of lnk
1141: bt - updated PetscBT (bitarray)
1142: Note: the level of U(i,j) is set as lvl(i,j) = min{ lvl(i,j), lvl(prow,i)+lvl(prow,j)+1)
1143: where idx = non-zero columns of U(prow,prow+1:n-1), prow<i
1144: */
1145: static inline PetscErrorCode PetscICCLLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt, PetscInt idxlvl_prow)
1146: {
1147: PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, idxlvl_prow, PETSC_TRUE);
1148: return 0;
1149: }
1151: /*
1152: Add a SORTED index set into a sorted linked list for ILU
1153: Input Parameters:
1154: nidx - number of input indices
1155: idx - sorted integer array used for storing column indices
1156: level - level of fill, e.g., ICC(level)
1157: idxlvl - level of idx
1158: idx_start - starting index of the list
1159: lnk - linked list(an integer array) that is created
1160: lnklvl - levels of lnk
1161: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1162: prow - the row number of idx
1163: output Parameters:
1164: nlnk - number of newly added idx
1165: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1166: lnklvl - levels of lnk
1167: bt - updated PetscBT (bitarray)
1169: Note: the level of factor(i,j) is set as lvl(i,j) = min{ lvl(i,j), lvl(i,prow)+lvl(prow,j)+1)
1170: where idx = non-zero columns of U(prow,prow+1:n-1), prow<i
1171: */
1172: static inline PetscErrorCode PetscILULLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscInt level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt, PetscInt prow)
1173: {
1174: PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, lnklvl[prow], PETSC_TRUE);
1175: return 0;
1176: }
1178: /*
1179: Add a index set into a sorted linked list
1180: Input Parameters:
1181: nidx - number of input idx
1182: idx - integer array used for storing column indices
1183: level - level of fill, e.g., ICC(level)
1184: idxlvl - level of idx
1185: idx_start - starting index of the list
1186: lnk - linked list(an integer array) that is created
1187: lnklvl - levels of lnk
1188: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1189: output Parameters:
1190: nlnk - number of newly added idx
1191: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1192: lnklvl - levels of lnk
1193: bt - updated PetscBT (bitarray)
1194: */
1195: static inline PetscErrorCode PetscIncompleteLLAdd(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt)
1196: {
1197: PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, 0, PETSC_FALSE);
1198: return 0;
1199: }
1201: /*
1202: Add a SORTED index set into a sorted linked list
1203: Input Parameters:
1204: nidx - number of input indices
1205: idx - sorted integer array used for storing column indices
1206: level - level of fill, e.g., ICC(level)
1207: idxlvl - level of idx
1208: idx_start - starting index of the list
1209: lnk - linked list(an integer array) that is created
1210: lnklvl - levels of lnk
1211: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1212: output Parameters:
1213: nlnk - number of newly added idx
1214: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1215: lnklvl - levels of lnk
1216: bt - updated PetscBT (bitarray)
1217: */
1218: static inline PetscErrorCode PetscIncompleteLLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt)
1219: {
1220: PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, 0, PETSC_TRUE);
1221: return 0;
1222: }
1224: /*
1225: Copy data on the list into an array, then initialize the list
1226: Input Parameters:
1227: idx_start - starting index of the list
1228: lnk_max - max value of lnk indicating the end of the list
1229: nlnk - number of data on the list to be copied
1230: lnk - linked list
1231: lnklvl - level of lnk
1232: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1233: output Parameters:
1234: indices - array that contains the copied data
1235: lnk - linked list that is cleaned and initialize
1236: lnklvl - level of lnk that is reinitialized
1237: bt - PetscBT (bitarray) with all bits set to false
1238: */
1239: static inline PetscErrorCode PetscIncompleteLLClean(PetscInt idx_start, PetscInt lnk_max, PetscInt nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscInt *PETSC_RESTRICT indices, PetscInt *PETSC_RESTRICT indiceslvl, PetscBT bt)
1240: {
1241: for (PetscInt j = 0, idx = idx_start; j < nlnk; ++j) {
1242: idx = lnk[idx];
1243: indices[j] = idx;
1244: indiceslvl[j] = lnklvl[idx];
1245: lnklvl[idx] = -1;
1246: PetscBTClear(bt, idx);
1247: }
1248: lnk[idx_start] = lnk_max;
1249: return 0;
1250: }
1252: /*
1253: Free memories used by the list
1254: */
1255: #define PetscIncompleteLLDestroy(lnk, bt) (PetscFree(lnk) || PetscBTDestroy(&(bt)))
1257: #if !defined(PETSC_CLANG_STATIC_ANALYZER)
1258: #define MatCheckSameLocalSize(A, ar1, B, ar2) \
1259: do { \
1262: (A)->rmap->n, (A)->cmap->n, ar2, (B)->rmap->n, (B)->cmap->n); \
1263: } while (0)
1264: #define MatCheckSameSize(A, ar1, B, ar2) \
1265: do { \
1267: (A)->rmap->N, (A)->cmap->N, ar2, (B)->rmap->N, (B)->cmap->N); \
1268: MatCheckSameLocalSize(A, ar1, B, ar2); \
1269: } while (0)
1270: #else
1271: template <typename Tm>
1272: void MatCheckSameLocalSize(Tm, int, Tm, int);
1273: template <typename Tm>
1274: void MatCheckSameSize(Tm, int, Tm, int);
1275: #endif
1277: #define VecCheckMatCompatible(M, x, ar1, b, ar2) \
1278: do { \
1280: (M)->cmap->N); \
1282: (M)->rmap->N); \
1283: } while (0)
1285: /* -------------------------------------------------------------------------------------------------------*/
1286: /*
1287: Create and initialize a condensed linked list -
1288: same as PetscLLCreate(), but uses a scalable array 'lnk' with size of max number of entries, not O(N).
1289: Barry suggested this approach (Dec. 6, 2011):
1290: I've thought of an alternative way of representing a linked list that is efficient but doesn't have the O(N) scaling issue
1291: (it may be faster than the O(N) even sequentially due to less crazy memory access).
1293: Instead of having some like a 2 -> 4 -> 11 -> 22 list that uses slot 2 4 11 and 22 in a big array use a small array with two slots
1294: for each entry for example [ 2 1 | 4 3 | 22 -1 | 11 2] so the first number (of the pair) is the value while the second tells you where
1295: in the list the next entry is. Inserting a new link means just append another pair at the end. For example say we want to insert 13 into the
1296: list it would then become [2 1 | 4 3 | 22 -1 | 11 4 | 13 2 ] you just add a pair at the end and fix the point for the one that points to it.
1297: That is 11 use to point to the 2 slot, after the change 11 points to the 4th slot which has the value 13. Note that values are always next
1298: to each other so memory access is much better than using the big array.
1300: Example:
1301: nlnk_max=5, lnk_max=36:
1302: Initial list: [0, 0 | 36, 2 | 0, 0 | 0, 0 | 0, 0 | 0, 0 | 0, 0]
1303: here, head_node has index 2 with value lnk[2]=lnk_max=36,
1304: 0-th entry is used to store the number of entries in the list,
1305: The initial lnk represents head -> tail(marked by 36) with number of entries = lnk[0]=0.
1307: Now adding a sorted set {2,4}, the list becomes
1308: [2, 0 | 36, 4 |2, 6 | 4, 2 | 0, 0 | 0, 0 | 0, 0 ]
1309: represents head -> 2 -> 4 -> tail with number of entries = lnk[0]=2.
1311: Then adding a sorted set {0,3,35}, the list
1312: [5, 0 | 36, 8 | 2, 10 | 4, 12 | 0, 4 | 3, 6 | 35, 2 ]
1313: represents head -> 0 -> 2 -> 3 -> 4 -> 35 -> tail with number of entries = lnk[0]=5.
1315: Input Parameters:
1316: nlnk_max - max length of the list
1317: lnk_max - max value of the entries
1318: Output Parameters:
1319: lnk - list created and initialized
1320: bt - PetscBT (bitarray) with all bits set to false. Note: bt has size lnk_max, not nln_max!
1321: */
1322: static inline PetscErrorCode PetscLLCondensedCreate(PetscInt nlnk_max, PetscInt lnk_max, PetscInt **lnk, PetscBT *bt)
1323: {
1324: PetscInt *llnk, lsize = 0;
1326: PetscIntMultError(2, nlnk_max + 2, &lsize);
1327: PetscMalloc1(lsize, lnk);
1328: PetscBTCreate(lnk_max, bt);
1329: llnk = *lnk;
1330: llnk[0] = 0; /* number of entries on the list */
1331: llnk[2] = lnk_max; /* value in the head node */
1332: llnk[3] = 2; /* next for the head node */
1333: return 0;
1334: }
1336: /*
1337: Add a SORTED ascending index set into a sorted linked list. See PetscLLCondensedCreate() for detailed description.
1338: Input Parameters:
1339: nidx - number of input indices
1340: indices - sorted integer array
1341: lnk - condensed linked list(an integer array) that is created
1342: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1343: output Parameters:
1344: lnk - the sorted(increasing order) linked list containing previous and newly added non-redundate indices
1345: bt - updated PetscBT (bitarray)
1346: */
1347: static inline PetscErrorCode PetscLLCondensedAddSorted(PetscInt nidx, const PetscInt indices[], PetscInt lnk[], PetscBT bt)
1348: {
1349: PetscInt _k, _entry, _location, _next, _lnkdata, _nlnk, _newnode;
1351: _nlnk = lnk[0]; /* num of entries on the input lnk */
1352: _location = 2; /* head */
1353: for (_k = 0; _k < nidx; _k++) {
1354: _entry = indices[_k];
1355: if (!PetscBTLookupSet(bt, _entry)) { /* new entry */
1356: /* search for insertion location */
1357: do {
1358: _next = _location + 1; /* link from previous node to next node */
1359: _location = lnk[_next]; /* idx of next node */
1360: _lnkdata = lnk[_location]; /* value of next node */
1361: } while (_entry > _lnkdata);
1362: /* insertion location is found, add entry into lnk */
1363: _newnode = 2 * (_nlnk + 2); /* index for this new node */
1364: lnk[_next] = _newnode; /* connect previous node to the new node */
1365: lnk[_newnode] = _entry; /* set value of the new node */
1366: lnk[_newnode + 1] = _location; /* connect new node to next node */
1367: _location = _newnode; /* next search starts from the new node */
1368: _nlnk++;
1369: }
1370: }
1371: lnk[0] = _nlnk; /* number of entries in the list */
1372: return 0;
1373: }
1375: static inline PetscErrorCode PetscLLCondensedClean(PetscInt lnk_max, PetscInt nidx, PetscInt *indices, PetscInt lnk[], PetscBT bt)
1376: {
1377: PetscInt _next = lnk[3]; /* head node */
1378: PetscInt _nlnk = lnk[0]; /* num of entries on the list */
1380: for (PetscInt _k = 0; _k < _nlnk; _k++) {
1381: indices[_k] = lnk[_next];
1382: _next = lnk[_next + 1];
1383: PetscBTClear(bt, indices[_k]);
1384: }
1385: lnk[0] = 0; /* num of entries on the list */
1386: lnk[2] = lnk_max; /* initialize head node */
1387: lnk[3] = 2; /* head node */
1388: return 0;
1389: }
1391: static inline PetscErrorCode PetscLLCondensedView(PetscInt *lnk)
1392: {
1393: PetscPrintf(PETSC_COMM_SELF, "LLCondensed of size %" PetscInt_FMT ", (val, next)\n", lnk[0]);
1394: for (PetscInt k = 2; k < lnk[0] + 2; ++k) PetscPrintf(PETSC_COMM_SELF, " %" PetscInt_FMT ": (%" PetscInt_FMT ", %" PetscInt_FMT ")\n", 2 * k, lnk[2 * k], lnk[2 * k + 1]);
1395: return 0;
1396: }
1398: /*
1399: Free memories used by the list
1400: */
1401: static inline PetscErrorCode PetscLLCondensedDestroy(PetscInt *lnk, PetscBT bt)
1402: {
1403: PetscFree(lnk);
1404: PetscBTDestroy(&bt);
1405: return 0;
1406: }
1408: /* -------------------------------------------------------------------------------------------------------*/
1409: /*
1410: Same as PetscLLCondensedCreate(), but does not use non-scalable O(lnk_max) bitarray
1411: Input Parameters:
1412: nlnk_max - max length of the list
1413: Output Parameters:
1414: lnk - list created and initialized
1415: */
1416: static inline PetscErrorCode PetscLLCondensedCreate_Scalable(PetscInt nlnk_max, PetscInt **lnk)
1417: {
1418: PetscInt *llnk, lsize = 0;
1420: PetscIntMultError(2, nlnk_max + 2, &lsize);
1421: PetscMalloc1(lsize, lnk);
1422: llnk = *lnk;
1423: llnk[0] = 0; /* number of entries on the list */
1424: llnk[2] = PETSC_MAX_INT; /* value in the head node */
1425: llnk[3] = 2; /* next for the head node */
1426: return 0;
1427: }
1429: static inline PetscErrorCode PetscLLCondensedExpand_Scalable(PetscInt nlnk_max, PetscInt **lnk)
1430: {
1431: PetscInt lsize = 0;
1433: PetscIntMultError(2, nlnk_max + 2, &lsize);
1434: PetscRealloc(lsize * sizeof(PetscInt), lnk);
1435: return 0;
1436: }
1438: static inline PetscErrorCode PetscLLCondensedAddSorted_Scalable(PetscInt nidx, const PetscInt indices[], PetscInt lnk[])
1439: {
1440: PetscInt _k, _entry, _location, _next, _lnkdata, _nlnk, _newnode;
1441: _nlnk = lnk[0]; /* num of entries on the input lnk */
1442: _location = 2; /* head */
1443: for (_k = 0; _k < nidx; _k++) {
1444: _entry = indices[_k];
1445: /* search for insertion location */
1446: do {
1447: _next = _location + 1; /* link from previous node to next node */
1448: _location = lnk[_next]; /* idx of next node */
1449: _lnkdata = lnk[_location]; /* value of next node */
1450: } while (_entry > _lnkdata);
1451: if (_entry < _lnkdata) {
1452: /* insertion location is found, add entry into lnk */
1453: _newnode = 2 * (_nlnk + 2); /* index for this new node */
1454: lnk[_next] = _newnode; /* connect previous node to the new node */
1455: lnk[_newnode] = _entry; /* set value of the new node */
1456: lnk[_newnode + 1] = _location; /* connect new node to next node */
1457: _location = _newnode; /* next search starts from the new node */
1458: _nlnk++;
1459: }
1460: }
1461: lnk[0] = _nlnk; /* number of entries in the list */
1462: return 0;
1463: }
1465: static inline PetscErrorCode PetscLLCondensedClean_Scalable(PetscInt nidx, PetscInt *indices, PetscInt *lnk)
1466: {
1467: PetscInt _k, _next, _nlnk;
1468: _next = lnk[3]; /* head node */
1469: _nlnk = lnk[0];
1470: for (_k = 0; _k < _nlnk; _k++) {
1471: indices[_k] = lnk[_next];
1472: _next = lnk[_next + 1];
1473: }
1474: lnk[0] = 0; /* num of entries on the list */
1475: lnk[3] = 2; /* head node */
1476: return 0;
1477: }
1479: static inline PetscErrorCode PetscLLCondensedDestroy_Scalable(PetscInt *lnk)
1480: {
1481: return PetscFree(lnk);
1482: }
1484: /* -------------------------------------------------------------------------------------------------------*/
1485: /*
1486: lnk[0] number of links
1487: lnk[1] number of entries
1488: lnk[3n] value
1489: lnk[3n+1] len
1490: lnk[3n+2] link to next value
1492: The next three are always the first link
1494: lnk[3] PETSC_MIN_INT+1
1495: lnk[4] 1
1496: lnk[5] link to first real entry
1498: The next three are always the last link
1500: lnk[6] PETSC_MAX_INT - 1
1501: lnk[7] 1
1502: lnk[8] next valid link (this is the same as lnk[0] but without the decreases)
1503: */
1505: static inline PetscErrorCode PetscLLCondensedCreate_fast(PetscInt nlnk_max, PetscInt **lnk)
1506: {
1507: PetscInt *llnk, lsize = 0;
1509: PetscIntMultError(3, nlnk_max + 3, &lsize);
1510: PetscMalloc1(lsize, lnk);
1511: llnk = *lnk;
1512: llnk[0] = 0; /* nlnk: number of entries on the list */
1513: llnk[1] = 0; /* number of integer entries represented in list */
1514: llnk[3] = PETSC_MIN_INT + 1; /* value in the first node */
1515: llnk[4] = 1; /* count for the first node */
1516: llnk[5] = 6; /* next for the first node */
1517: llnk[6] = PETSC_MAX_INT - 1; /* value in the last node */
1518: llnk[7] = 1; /* count for the last node */
1519: llnk[8] = 0; /* next valid node to be used */
1520: return 0;
1521: }
1523: static inline PetscErrorCode PetscLLCondensedAddSorted_fast(PetscInt nidx, const PetscInt indices[], PetscInt lnk[])
1524: {
1525: PetscInt k, entry, prev, next;
1526: prev = 3; /* first value */
1527: next = lnk[prev + 2];
1528: for (k = 0; k < nidx; k++) {
1529: entry = indices[k];
1530: /* search for insertion location */
1531: while (entry >= lnk[next]) {
1532: prev = next;
1533: next = lnk[next + 2];
1534: }
1535: /* entry is in range of previous list */
1536: if (entry < lnk[prev] + lnk[prev + 1]) continue;
1537: lnk[1]++;
1538: /* entry is right after previous list */
1539: if (entry == lnk[prev] + lnk[prev + 1]) {
1540: lnk[prev + 1]++;
1541: if (lnk[next] == entry + 1) { /* combine two contiguous strings */
1542: lnk[prev + 1] += lnk[next + 1];
1543: lnk[prev + 2] = lnk[next + 2];
1544: next = lnk[next + 2];
1545: lnk[0]--;
1546: }
1547: continue;
1548: }
1549: /* entry is right before next list */
1550: if (entry == lnk[next] - 1) {
1551: lnk[next]--;
1552: lnk[next + 1]++;
1553: prev = next;
1554: next = lnk[prev + 2];
1555: continue;
1556: }
1557: /* add entry into lnk */
1558: lnk[prev + 2] = 3 * ((lnk[8]++) + 3); /* connect previous node to the new node */
1559: prev = lnk[prev + 2];
1560: lnk[prev] = entry; /* set value of the new node */
1561: lnk[prev + 1] = 1; /* number of values in contiguous string is one to start */
1562: lnk[prev + 2] = next; /* connect new node to next node */
1563: lnk[0]++;
1564: }
1565: return 0;
1566: }
1568: static inline PetscErrorCode PetscLLCondensedClean_fast(PetscInt nidx, PetscInt *indices, PetscInt *lnk)
1569: {
1570: PetscInt _k, _next, _nlnk, cnt, j;
1571: _next = lnk[5]; /* first node */
1572: _nlnk = lnk[0];
1573: cnt = 0;
1574: for (_k = 0; _k < _nlnk; _k++) {
1575: for (j = 0; j < lnk[_next + 1]; j++) indices[cnt++] = lnk[_next] + j;
1576: _next = lnk[_next + 2];
1577: }
1578: lnk[0] = 0; /* nlnk: number of links */
1579: lnk[1] = 0; /* number of integer entries represented in list */
1580: lnk[3] = PETSC_MIN_INT + 1; /* value in the first node */
1581: lnk[4] = 1; /* count for the first node */
1582: lnk[5] = 6; /* next for the first node */
1583: lnk[6] = PETSC_MAX_INT - 1; /* value in the last node */
1584: lnk[7] = 1; /* count for the last node */
1585: lnk[8] = 0; /* next valid location to make link */
1586: return 0;
1587: }
1589: static inline PetscErrorCode PetscLLCondensedView_fast(PetscInt *lnk)
1590: {
1591: PetscInt k, next, nlnk;
1592: next = lnk[5]; /* first node */
1593: nlnk = lnk[0];
1594: for (k = 0; k < nlnk; k++) {
1595: #if 0 /* Debugging code */
1596: printf("%d value %d len %d next %d\n",next,lnk[next],lnk[next+1],lnk[next+2]);
1597: #endif
1598: next = lnk[next + 2];
1599: }
1600: return 0;
1601: }
1603: static inline PetscErrorCode PetscLLCondensedDestroy_fast(PetscInt *lnk)
1604: {
1605: return PetscFree(lnk);
1606: }
1608: /* this is extern because it is used in MatFDColoringUseDM() which is in the DM library */
1609: PETSC_EXTERN PetscErrorCode MatFDColoringApply_AIJ(Mat, MatFDColoring, Vec, void *);
1611: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1612: PETSC_INTERN PetscErrorCode MatSeqAIJMoveDiagonalValuesFront_SeqAIJKokkos(Mat, const PetscInt *);
1613: #endif
1615: PETSC_EXTERN PetscLogEvent MAT_Mult;
1616: PETSC_EXTERN PetscLogEvent MAT_MultMatrixFree;
1617: PETSC_EXTERN PetscLogEvent MAT_Mults;
1618: PETSC_EXTERN PetscLogEvent MAT_MultAdd;
1619: PETSC_EXTERN PetscLogEvent MAT_MultTranspose;
1620: PETSC_EXTERN PetscLogEvent MAT_MultTransposeAdd;
1621: PETSC_EXTERN PetscLogEvent MAT_Solve;
1622: PETSC_EXTERN PetscLogEvent MAT_Solves;
1623: PETSC_EXTERN PetscLogEvent MAT_SolveAdd;
1624: PETSC_EXTERN PetscLogEvent MAT_SolveTranspose;
1625: PETSC_EXTERN PetscLogEvent MAT_SolveTransposeAdd;
1626: PETSC_EXTERN PetscLogEvent MAT_SOR;
1627: PETSC_EXTERN PetscLogEvent MAT_ForwardSolve;
1628: PETSC_EXTERN PetscLogEvent MAT_BackwardSolve;
1629: PETSC_EXTERN PetscLogEvent MAT_LUFactor;
1630: PETSC_EXTERN PetscLogEvent MAT_LUFactorSymbolic;
1631: PETSC_EXTERN PetscLogEvent MAT_LUFactorNumeric;
1632: PETSC_EXTERN PetscLogEvent MAT_QRFactor;
1633: PETSC_EXTERN PetscLogEvent MAT_QRFactorSymbolic;
1634: PETSC_EXTERN PetscLogEvent MAT_QRFactorNumeric;
1635: PETSC_EXTERN PetscLogEvent MAT_CholeskyFactor;
1636: PETSC_EXTERN PetscLogEvent MAT_CholeskyFactorSymbolic;
1637: PETSC_EXTERN PetscLogEvent MAT_CholeskyFactorNumeric;
1638: PETSC_EXTERN PetscLogEvent MAT_ILUFactor;
1639: PETSC_EXTERN PetscLogEvent MAT_ILUFactorSymbolic;
1640: PETSC_EXTERN PetscLogEvent MAT_ICCFactorSymbolic;
1641: PETSC_EXTERN PetscLogEvent MAT_Copy;
1642: PETSC_EXTERN PetscLogEvent MAT_Convert;
1643: PETSC_EXTERN PetscLogEvent MAT_Scale;
1644: PETSC_EXTERN PetscLogEvent MAT_AssemblyBegin;
1645: PETSC_EXTERN PetscLogEvent MAT_AssemblyEnd;
1646: PETSC_EXTERN PetscLogEvent MAT_SetValues;
1647: PETSC_EXTERN PetscLogEvent MAT_GetValues;
1648: PETSC_EXTERN PetscLogEvent MAT_GetRow;
1649: PETSC_EXTERN PetscLogEvent MAT_GetRowIJ;
1650: PETSC_EXTERN PetscLogEvent MAT_CreateSubMats;
1651: PETSC_EXTERN PetscLogEvent MAT_GetColoring;
1652: PETSC_EXTERN PetscLogEvent MAT_GetOrdering;
1653: PETSC_EXTERN PetscLogEvent MAT_RedundantMat;
1654: PETSC_EXTERN PetscLogEvent MAT_IncreaseOverlap;
1655: PETSC_EXTERN PetscLogEvent MAT_Partitioning;
1656: PETSC_EXTERN PetscLogEvent MAT_PartitioningND;
1657: PETSC_EXTERN PetscLogEvent MAT_Coarsen;
1658: PETSC_EXTERN PetscLogEvent MAT_ZeroEntries;
1659: PETSC_EXTERN PetscLogEvent MAT_Load;
1660: PETSC_EXTERN PetscLogEvent MAT_View;
1661: PETSC_EXTERN PetscLogEvent MAT_AXPY;
1662: PETSC_EXTERN PetscLogEvent MAT_FDColoringCreate;
1663: PETSC_EXTERN PetscLogEvent MAT_TransposeColoringCreate;
1664: PETSC_EXTERN PetscLogEvent MAT_FDColoringSetUp;
1665: PETSC_EXTERN PetscLogEvent MAT_FDColoringApply;
1666: PETSC_EXTERN PetscLogEvent MAT_Transpose;
1667: PETSC_EXTERN PetscLogEvent MAT_FDColoringFunction;
1668: PETSC_EXTERN PetscLogEvent MAT_CreateSubMat;
1669: PETSC_EXTERN PetscLogEvent MAT_MatSolve;
1670: PETSC_EXTERN PetscLogEvent MAT_MatTrSolve;
1671: PETSC_EXTERN PetscLogEvent MAT_MatMultSymbolic;
1672: PETSC_EXTERN PetscLogEvent MAT_MatMultNumeric;
1673: PETSC_EXTERN PetscLogEvent MAT_Getlocalmatcondensed;
1674: PETSC_EXTERN PetscLogEvent MAT_GetBrowsOfAcols;
1675: PETSC_EXTERN PetscLogEvent MAT_GetBrowsOfAocols;
1676: PETSC_EXTERN PetscLogEvent MAT_PtAPSymbolic;
1677: PETSC_EXTERN PetscLogEvent MAT_PtAPNumeric;
1678: PETSC_EXTERN PetscLogEvent MAT_Seqstompinum;
1679: PETSC_EXTERN PetscLogEvent MAT_Seqstompisym;
1680: PETSC_EXTERN PetscLogEvent MAT_Seqstompi;
1681: PETSC_EXTERN PetscLogEvent MAT_Getlocalmat;
1682: PETSC_EXTERN PetscLogEvent MAT_RARtSymbolic;
1683: PETSC_EXTERN PetscLogEvent MAT_RARtNumeric;
1684: PETSC_EXTERN PetscLogEvent MAT_MatTransposeMultSymbolic;
1685: PETSC_EXTERN PetscLogEvent MAT_MatTransposeMultNumeric;
1686: PETSC_EXTERN PetscLogEvent MAT_TransposeMatMultSymbolic;
1687: PETSC_EXTERN PetscLogEvent MAT_TransposeMatMultNumeric;
1688: PETSC_EXTERN PetscLogEvent MAT_MatMatMultSymbolic;
1689: PETSC_EXTERN PetscLogEvent MAT_MatMatMultNumeric;
1690: PETSC_EXTERN PetscLogEvent MAT_Applypapt;
1691: PETSC_EXTERN PetscLogEvent MAT_Applypapt_symbolic;
1692: PETSC_EXTERN PetscLogEvent MAT_Applypapt_numeric;
1693: PETSC_EXTERN PetscLogEvent MAT_Getsymtranspose;
1694: PETSC_EXTERN PetscLogEvent MAT_Getsymtransreduced;
1695: PETSC_EXTERN PetscLogEvent MAT_GetSequentialNonzeroStructure;
1696: PETSC_EXTERN PetscLogEvent MATMFFD_Mult;
1697: PETSC_EXTERN PetscLogEvent MAT_GetMultiProcBlock;
1698: PETSC_EXTERN PetscLogEvent MAT_CUSPARSECopyToGPU;
1699: PETSC_EXTERN PetscLogEvent MAT_CUSPARSECopyFromGPU;
1700: PETSC_EXTERN PetscLogEvent MAT_CUSPARSEGenerateTranspose;
1701: PETSC_EXTERN PetscLogEvent MAT_CUSPARSESolveAnalysis;
1702: PETSC_EXTERN PetscLogEvent MAT_HIPSPARSECopyToGPU;
1703: PETSC_EXTERN PetscLogEvent MAT_HIPSPARSECopyFromGPU;
1704: PETSC_EXTERN PetscLogEvent MAT_HIPSPARSEGenerateTranspose;
1705: PETSC_EXTERN PetscLogEvent MAT_HIPSPARSESolveAnalysis;
1706: PETSC_EXTERN PetscLogEvent MAT_SetValuesBatch;
1707: PETSC_EXTERN PetscLogEvent MAT_ViennaCLCopyToGPU;
1708: PETSC_EXTERN PetscLogEvent MAT_DenseCopyToGPU;
1709: PETSC_EXTERN PetscLogEvent MAT_DenseCopyFromGPU;
1710: PETSC_EXTERN PetscLogEvent MAT_Merge;
1711: PETSC_EXTERN PetscLogEvent MAT_Residual;
1712: PETSC_EXTERN PetscLogEvent MAT_SetRandom;
1713: PETSC_EXTERN PetscLogEvent MAT_FactorFactS;
1714: PETSC_EXTERN PetscLogEvent MAT_FactorInvS;
1715: PETSC_EXTERN PetscLogEvent MAT_PreallCOO;
1716: PETSC_EXTERN PetscLogEvent MAT_SetVCOO;
1717: PETSC_EXTERN PetscLogEvent MATCOLORING_Apply;
1718: PETSC_EXTERN PetscLogEvent MATCOLORING_Comm;
1719: PETSC_EXTERN PetscLogEvent MATCOLORING_Local;
1720: PETSC_EXTERN PetscLogEvent MATCOLORING_ISCreate;
1721: PETSC_EXTERN PetscLogEvent MATCOLORING_SetUp;
1722: PETSC_EXTERN PetscLogEvent MATCOLORING_Weights;
1723: PETSC_EXTERN PetscLogEvent MAT_H2Opus_Build;
1724: PETSC_EXTERN PetscLogEvent MAT_H2Opus_Compress;
1725: PETSC_EXTERN PetscLogEvent MAT_H2Opus_Orthog;
1726: PETSC_EXTERN PetscLogEvent MAT_H2Opus_LR;
1728: #endif