Actual source code: mmbaij.c
2: /*
3: Support for the parallel BAIJ matrix vector multiply
4: */
5: #include <../src/mat/impls/baij/mpi/mpibaij.h>
6: #include <petsc/private/isimpl.h>
8: PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
9: {
10: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
11: Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(baij->B->data);
12: PetscInt i, j, *aj = B->j, ec = 0, *garray;
13: PetscInt bs = mat->rmap->bs, *stmp;
14: IS from, to;
15: Vec gvec;
16: #if defined(PETSC_USE_CTABLE)
17: PetscTable gid1_lid1;
18: PetscTablePosition tpos;
19: PetscInt gid, lid;
20: #else
21: PetscInt Nbs = baij->Nbs, *indices;
22: #endif
24: #if defined(PETSC_USE_CTABLE)
25: /* use a table - Mark Adams */
26: PetscTableCreate(B->mbs, baij->Nbs + 1, &gid1_lid1);
27: for (i = 0; i < B->mbs; i++) {
28: for (j = 0; j < B->ilen[i]; j++) {
29: PetscInt data, gid1 = aj[B->i[i] + j] + 1;
30: PetscTableFind(gid1_lid1, gid1, &data);
31: if (!data) {
32: /* one based table */
33: PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES);
34: }
35: }
36: }
37: /* form array of columns we need */
38: PetscMalloc1(ec, &garray);
39: PetscTableGetHeadPosition(gid1_lid1, &tpos);
40: while (tpos) {
41: PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid);
42: gid--;
43: lid--;
44: garray[lid] = gid;
45: }
46: PetscSortInt(ec, garray);
47: PetscTableRemoveAll(gid1_lid1);
48: for (i = 0; i < ec; i++) PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES);
49: /* compact out the extra columns in B */
50: for (i = 0; i < B->mbs; i++) {
51: for (j = 0; j < B->ilen[i]; j++) {
52: PetscInt gid1 = aj[B->i[i] + j] + 1;
53: PetscTableFind(gid1_lid1, gid1, &lid);
54: lid--;
55: aj[B->i[i] + j] = lid;
56: }
57: }
58: B->nbs = ec;
59: PetscLayoutDestroy(&baij->B->cmap);
60: PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap);
61: PetscTableDestroy(&gid1_lid1);
62: #else
63: /* Make an array as long as the number of columns */
64: /* mark those columns that are in baij->B */
65: PetscCalloc1(Nbs, &indices);
66: for (i = 0; i < B->mbs; i++) {
67: for (j = 0; j < B->ilen[i]; j++) {
68: if (!indices[aj[B->i[i] + j]]) ec++;
69: indices[aj[B->i[i] + j]] = 1;
70: }
71: }
73: /* form array of columns we need */
74: PetscMalloc1(ec, &garray);
75: ec = 0;
76: for (i = 0; i < Nbs; i++) {
77: if (indices[i]) garray[ec++] = i;
78: }
80: /* make indices now point into garray */
81: for (i = 0; i < ec; i++) indices[garray[i]] = i;
83: /* compact out the extra columns in B */
84: for (i = 0; i < B->mbs; i++) {
85: for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
86: }
87: B->nbs = ec;
88: PetscLayoutDestroy(&baij->B->cmap);
89: PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap);
90: PetscFree(indices);
91: #endif
93: /* create local vector that is used to scatter into */
94: VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec);
96: /* create two temporary index sets for building scatter-gather */
97: ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from);
99: PetscMalloc1(ec, &stmp);
100: for (i = 0; i < ec; i++) stmp[i] = i;
101: ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to);
103: /* create temporary global vector to generate scatter context */
104: VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec);
106: VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx);
107: VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view");
109: baij->garray = garray;
111: ISDestroy(&from);
112: ISDestroy(&to);
113: VecDestroy(&gvec);
114: return 0;
115: }
117: /*
118: Takes the local part of an already assembled MPIBAIJ matrix
119: and disassembles it. This is to allow new nonzeros into the matrix
120: that require more communication in the matrix vector multiply.
121: Thus certain data-structures must be rebuilt.
123: Kind of slow! But that's what application programmers get when
124: they are sloppy.
125: */
126: PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
127: {
128: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
129: Mat B = baij->B, Bnew;
130: Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data;
131: PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
132: PetscInt bs2 = baij->bs2, *nz, m = A->rmap->n;
133: MatScalar *a = Bbaij->a;
134: MatScalar *atmp;
136: /* free stuff related to matrix-vec multiply */
137: VecDestroy(&baij->lvec);
138: baij->lvec = NULL;
139: VecScatterDestroy(&baij->Mvctx);
140: baij->Mvctx = NULL;
141: if (baij->colmap) {
142: #if defined(PETSC_USE_CTABLE)
143: PetscTableDestroy(&baij->colmap);
144: #else
145: PetscFree(baij->colmap);
146: #endif
147: }
149: /* make sure that B is assembled so we can access its values */
150: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
151: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
153: /* invent new B and copy stuff over */
154: PetscMalloc1(mbs, &nz);
155: for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
156: MatCreate(PetscObjectComm((PetscObject)B), &Bnew);
157: MatSetSizes(Bnew, m, n, m, n);
158: MatSetType(Bnew, ((PetscObject)B)->type_name);
159: MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz);
160: if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
161: ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
162: }
164: MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE);
165: /*
166: Ensure that B's nonzerostate is monotonically increasing.
167: Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
168: */
169: Bnew->nonzerostate = B->nonzerostate;
171: for (i = 0; i < mbs; i++) {
172: for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
173: col = garray[Bbaij->j[j]];
174: atmp = a + j * bs2;
175: MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode);
176: }
177: }
178: MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE);
180: PetscFree(nz);
181: PetscFree(baij->garray);
182: MatDestroy(&B);
184: baij->B = Bnew;
185: A->was_assembled = PETSC_FALSE;
186: A->assembled = PETSC_FALSE;
187: return 0;
188: }
190: /* ugly stuff added for Glenn someday we should fix this up */
192: static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
193: static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */
195: PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
196: {
197: Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */
198: Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data;
199: PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices;
200: PetscInt *r_rmapd, *r_rmapo;
202: MatGetOwnershipRange(inA, &cstart, &cend);
203: MatGetSize(ina->A, NULL, &n);
204: PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd);
205: nt = 0;
206: for (i = 0; i < inA->rmap->mapping->n; i++) {
207: if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
208: nt++;
209: r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
210: }
211: }
213: PetscMalloc1(n + 1, &uglyrmapd);
214: for (i = 0; i < inA->rmap->mapping->n; i++) {
215: if (r_rmapd[i]) {
216: for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
217: }
218: }
219: PetscFree(r_rmapd);
220: VecCreateSeq(PETSC_COMM_SELF, n, &uglydd);
222: PetscCalloc1(ina->Nbs + 1, &lindices);
223: for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1;
224: no = inA->rmap->mapping->n - nt;
225: PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo);
226: nt = 0;
227: for (i = 0; i < inA->rmap->mapping->n; i++) {
228: if (lindices[inA->rmap->mapping->indices[i]]) {
229: nt++;
230: r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
231: }
232: }
234: PetscFree(lindices);
235: PetscMalloc1(nt * bs + 1, &uglyrmapo);
236: for (i = 0; i < inA->rmap->mapping->n; i++) {
237: if (r_rmapo[i]) {
238: for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
239: }
240: }
241: PetscFree(r_rmapo);
242: VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo);
243: return 0;
244: }
246: PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale)
247: {
248: /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
250: PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale));
251: return 0;
252: }
254: PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale)
255: {
256: Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */
257: PetscInt n, i;
258: PetscScalar *d, *o;
259: const PetscScalar *s;
261: if (!uglyrmapd) MatMPIBAIJDiagonalScaleLocalSetUp(A, scale);
263: VecGetArrayRead(scale, &s);
265: VecGetLocalSize(uglydd, &n);
266: VecGetArray(uglydd, &d);
267: for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
268: VecRestoreArray(uglydd, &d);
269: /* column scale "diagonal" portion of local matrix */
270: MatDiagonalScale(a->A, NULL, uglydd);
272: VecGetLocalSize(uglyoo, &n);
273: VecGetArray(uglyoo, &o);
274: for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
275: VecRestoreArrayRead(scale, &s);
276: VecRestoreArray(uglyoo, &o);
277: /* column scale "off-diagonal" portion of local matrix */
278: MatDiagonalScale(a->B, NULL, uglyoo);
279: return 0;
280: }