Actual source code: mmbaij.c


  2: /*
  3:    Support for the parallel BAIJ matrix vector multiply
  4: */
  5: #include <../src/mat/impls/baij/mpi/mpibaij.h>
  6: #include <petsc/private/isimpl.h>

  8: PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
  9: {
 10:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
 11:   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)(baij->B->data);
 12:   PetscInt     i, j, *aj = B->j, ec = 0, *garray;
 13:   PetscInt     bs = mat->rmap->bs, *stmp;
 14:   IS           from, to;
 15:   Vec          gvec;
 16: #if defined(PETSC_USE_CTABLE)
 17:   PetscTable         gid1_lid1;
 18:   PetscTablePosition tpos;
 19:   PetscInt           gid, lid;
 20: #else
 21:   PetscInt Nbs = baij->Nbs, *indices;
 22: #endif

 24: #if defined(PETSC_USE_CTABLE)
 25:   /* use a table - Mark Adams */
 26:   PetscTableCreate(B->mbs, baij->Nbs + 1, &gid1_lid1);
 27:   for (i = 0; i < B->mbs; i++) {
 28:     for (j = 0; j < B->ilen[i]; j++) {
 29:       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
 30:       PetscTableFind(gid1_lid1, gid1, &data);
 31:       if (!data) {
 32:         /* one based table */
 33:         PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES);
 34:       }
 35:     }
 36:   }
 37:   /* form array of columns we need */
 38:   PetscMalloc1(ec, &garray);
 39:   PetscTableGetHeadPosition(gid1_lid1, &tpos);
 40:   while (tpos) {
 41:     PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid);
 42:     gid--;
 43:     lid--;
 44:     garray[lid] = gid;
 45:   }
 46:   PetscSortInt(ec, garray);
 47:   PetscTableRemoveAll(gid1_lid1);
 48:   for (i = 0; i < ec; i++) PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES);
 49:   /* compact out the extra columns in B */
 50:   for (i = 0; i < B->mbs; i++) {
 51:     for (j = 0; j < B->ilen[i]; j++) {
 52:       PetscInt gid1 = aj[B->i[i] + j] + 1;
 53:       PetscTableFind(gid1_lid1, gid1, &lid);
 54:       lid--;
 55:       aj[B->i[i] + j] = lid;
 56:     }
 57:   }
 58:   B->nbs = ec;
 59:   PetscLayoutDestroy(&baij->B->cmap);
 60:   PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap);
 61:   PetscTableDestroy(&gid1_lid1);
 62: #else
 63:   /* Make an array as long as the number of columns */
 64:   /* mark those columns that are in baij->B */
 65:   PetscCalloc1(Nbs, &indices);
 66:   for (i = 0; i < B->mbs; i++) {
 67:     for (j = 0; j < B->ilen[i]; j++) {
 68:       if (!indices[aj[B->i[i] + j]]) ec++;
 69:       indices[aj[B->i[i] + j]] = 1;
 70:     }
 71:   }

 73:   /* form array of columns we need */
 74:   PetscMalloc1(ec, &garray);
 75:   ec = 0;
 76:   for (i = 0; i < Nbs; i++) {
 77:     if (indices[i]) garray[ec++] = i;
 78:   }

 80:   /* make indices now point into garray */
 81:   for (i = 0; i < ec; i++) indices[garray[i]] = i;

 83:   /* compact out the extra columns in B */
 84:   for (i = 0; i < B->mbs; i++) {
 85:     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
 86:   }
 87:   B->nbs = ec;
 88:   PetscLayoutDestroy(&baij->B->cmap);
 89:   PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap);
 90:   PetscFree(indices);
 91: #endif

 93:   /* create local vector that is used to scatter into */
 94:   VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec);

 96:   /* create two temporary index sets for building scatter-gather */
 97:   ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from);

 99:   PetscMalloc1(ec, &stmp);
100:   for (i = 0; i < ec; i++) stmp[i] = i;
101:   ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to);

103:   /* create temporary global vector to generate scatter context */
104:   VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec);

106:   VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx);
107:   VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view");

109:   baij->garray = garray;

111:   ISDestroy(&from);
112:   ISDestroy(&to);
113:   VecDestroy(&gvec);
114:   return 0;
115: }

117: /*
118:      Takes the local part of an already assembled MPIBAIJ matrix
119:    and disassembles it. This is to allow new nonzeros into the matrix
120:    that require more communication in the matrix vector multiply.
121:    Thus certain data-structures must be rebuilt.

123:    Kind of slow! But that's what application programmers get when
124:    they are sloppy.
125: */
126: PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
127: {
128:   Mat_MPIBAIJ *baij  = (Mat_MPIBAIJ *)A->data;
129:   Mat          B     = baij->B, Bnew;
130:   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data;
131:   PetscInt     i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
132:   PetscInt     bs2 = baij->bs2, *nz, m = A->rmap->n;
133:   MatScalar   *a = Bbaij->a;
134:   MatScalar   *atmp;

136:   /* free stuff related to matrix-vec multiply */
137:   VecDestroy(&baij->lvec);
138:   baij->lvec = NULL;
139:   VecScatterDestroy(&baij->Mvctx);
140:   baij->Mvctx = NULL;
141:   if (baij->colmap) {
142: #if defined(PETSC_USE_CTABLE)
143:     PetscTableDestroy(&baij->colmap);
144: #else
145:     PetscFree(baij->colmap);
146: #endif
147:   }

149:   /* make sure that B is assembled so we can access its values */
150:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
151:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);

153:   /* invent new B and copy stuff over */
154:   PetscMalloc1(mbs, &nz);
155:   for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
156:   MatCreate(PetscObjectComm((PetscObject)B), &Bnew);
157:   MatSetSizes(Bnew, m, n, m, n);
158:   MatSetType(Bnew, ((PetscObject)B)->type_name);
159:   MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz);
160:   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
161:     ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
162:   }

164:   MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE);
165:   /*
166:    Ensure that B's nonzerostate is monotonically increasing.
167:    Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
168:    */
169:   Bnew->nonzerostate = B->nonzerostate;

171:   for (i = 0; i < mbs; i++) {
172:     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
173:       col  = garray[Bbaij->j[j]];
174:       atmp = a + j * bs2;
175:       MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode);
176:     }
177:   }
178:   MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE);

180:   PetscFree(nz);
181:   PetscFree(baij->garray);
182:   MatDestroy(&B);

184:   baij->B          = Bnew;
185:   A->was_assembled = PETSC_FALSE;
186:   A->assembled     = PETSC_FALSE;
187:   return 0;
188: }

190: /*      ugly stuff added for Glenn someday we should fix this up */

192: static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
193: static Vec       uglydd = NULL, uglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */

195: PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
196: {
197:   Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */
198:   Mat_SeqBAIJ *B   = (Mat_SeqBAIJ *)ina->B->data;
199:   PetscInt     bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices;
200:   PetscInt    *r_rmapd, *r_rmapo;

202:   MatGetOwnershipRange(inA, &cstart, &cend);
203:   MatGetSize(ina->A, NULL, &n);
204:   PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd);
205:   nt = 0;
206:   for (i = 0; i < inA->rmap->mapping->n; i++) {
207:     if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
208:       nt++;
209:       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
210:     }
211:   }
213:   PetscMalloc1(n + 1, &uglyrmapd);
214:   for (i = 0; i < inA->rmap->mapping->n; i++) {
215:     if (r_rmapd[i]) {
216:       for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
217:     }
218:   }
219:   PetscFree(r_rmapd);
220:   VecCreateSeq(PETSC_COMM_SELF, n, &uglydd);

222:   PetscCalloc1(ina->Nbs + 1, &lindices);
223:   for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1;
224:   no = inA->rmap->mapping->n - nt;
225:   PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo);
226:   nt = 0;
227:   for (i = 0; i < inA->rmap->mapping->n; i++) {
228:     if (lindices[inA->rmap->mapping->indices[i]]) {
229:       nt++;
230:       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
231:     }
232:   }
234:   PetscFree(lindices);
235:   PetscMalloc1(nt * bs + 1, &uglyrmapo);
236:   for (i = 0; i < inA->rmap->mapping->n; i++) {
237:     if (r_rmapo[i]) {
238:       for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
239:     }
240:   }
241:   PetscFree(r_rmapo);
242:   VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo);
243:   return 0;
244: }

246: PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale)
247: {
248:   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */

250:   PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale));
251:   return 0;
252: }

254: PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale)
255: {
256:   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */
257:   PetscInt           n, i;
258:   PetscScalar       *d, *o;
259:   const PetscScalar *s;

261:   if (!uglyrmapd) MatMPIBAIJDiagonalScaleLocalSetUp(A, scale);

263:   VecGetArrayRead(scale, &s);

265:   VecGetLocalSize(uglydd, &n);
266:   VecGetArray(uglydd, &d);
267:   for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
268:   VecRestoreArray(uglydd, &d);
269:   /* column scale "diagonal" portion of local matrix */
270:   MatDiagonalScale(a->A, NULL, uglydd);

272:   VecGetLocalSize(uglyoo, &n);
273:   VecGetArray(uglyoo, &o);
274:   for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
275:   VecRestoreArrayRead(scale, &s);
276:   VecRestoreArray(uglyoo, &o);
277:   /* column scale "off-diagonal" portion of local matrix */
278:   MatDiagonalScale(a->B, NULL, uglyoo);
279:   return 0;
280: }