Actual source code: mmsbaij.c


  2: /*
  3:    Support for the parallel SBAIJ matrix vector multiply
  4: */
  5: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>

  7: PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
  8: {
  9:   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ *)mat->data;
 10:   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ *)(sbaij->B->data);
 11:   PetscInt        Nbs = sbaij->Nbs, i, j, *aj = B->j, ec = 0, *garray, *sgarray;
 12:   PetscInt        bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
 13:   IS              from, to;
 14:   Vec             gvec;
 15:   PetscMPIInt     rank   = sbaij->rank, lsize;
 16:   PetscInt       *owners = sbaij->rangebs, *ec_owner, k;
 17:   const PetscInt *sowners;
 18:   PetscScalar    *ptr;
 19: #if defined(PETSC_USE_CTABLE)
 20:   PetscTable         gid1_lid1; /* one-based gid to lid table */
 21:   PetscTablePosition tpos;
 22:   PetscInt           gid, lid;
 23: #else
 24:   PetscInt *indices;
 25: #endif

 27: #if defined(PETSC_USE_CTABLE)
 28:   PetscTableCreate(mbs, Nbs + 1, &gid1_lid1);
 29:   for (i = 0; i < B->mbs; i++) {
 30:     for (j = 0; j < B->ilen[i]; j++) {
 31:       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
 32:       PetscTableFind(gid1_lid1, gid1, &data);
 33:       if (!data) PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES);
 34:     }
 35:   }
 36:   /* form array of columns we need */
 37:   PetscMalloc1(ec, &garray);
 38:   PetscTableGetHeadPosition(gid1_lid1, &tpos);
 39:   while (tpos) {
 40:     PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid);
 41:     gid--;
 42:     lid--;
 43:     garray[lid] = gid;
 44:   }
 45:   PetscSortInt(ec, garray);
 46:   PetscTableRemoveAll(gid1_lid1);
 47:   for (i = 0; i < ec; i++) PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES);
 48:   /* compact out the extra columns in B */
 49:   for (i = 0; i < B->mbs; i++) {
 50:     for (j = 0; j < B->ilen[i]; j++) {
 51:       PetscInt gid1 = aj[B->i[i] + j] + 1;
 52:       PetscTableFind(gid1_lid1, gid1, &lid);
 53:       lid--;
 54:       aj[B->i[i] + j] = lid;
 55:     }
 56:   }
 57:   PetscTableDestroy(&gid1_lid1);
 58:   PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner);
 59:   for (i = j = 0; i < ec; i++) {
 60:     while (garray[i] >= owners[j + 1]) j++;
 61:     ec_owner[i] = j;
 62:   }
 63: #else
 64:   /* For the first stab we make an array as long as the number of columns */
 65:   /* mark those columns that are in sbaij->B */
 66:   PetscCalloc1(Nbs, &indices);
 67:   for (i = 0; i < mbs; i++) {
 68:     for (j = 0; j < B->ilen[i]; j++) {
 69:       if (!indices[aj[B->i[i] + j]]) ec++;
 70:       indices[aj[B->i[i] + j]] = 1;
 71:     }
 72:   }

 74:   /* form arrays of columns we need */
 75:   PetscMalloc1(ec, &garray);
 76:   PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner);

 78:   ec = 0;
 79:   for (j = 0; j < sbaij->size; j++) {
 80:     for (i = owners[j]; i < owners[j + 1]; i++) {
 81:       if (indices[i]) {
 82:         garray[ec]   = i;
 83:         ec_owner[ec] = j;
 84:         ec++;
 85:       }
 86:     }
 87:   }

 89:   /* make indices now point into garray */
 90:   for (i = 0; i < ec; i++) indices[garray[i]] = i;

 92:   /* compact out the extra columns in B */
 93:   for (i = 0; i < mbs; i++) {
 94:     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
 95:   }
 96:   PetscFree(indices);
 97: #endif
 98:   B->nbs = ec;
 99:   PetscLayoutDestroy(&sbaij->B->cmap);
100:   PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap);

102:   VecScatterDestroy(&sbaij->sMvctx);
103:   /* create local vector that is used to scatter into */
104:   VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec);

106:   /* create two temporary index sets for building scatter-gather */
107:   PetscMalloc1(2 * ec, &stmp);
108:   ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from);
109:   for (i = 0; i < ec; i++) stmp[i] = i;
110:   ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to);

112:   /* generate the scatter context
113:      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
114:   VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec);
115:   VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx);
116:   VecDestroy(&gvec);

118:   sbaij->garray = garray;

120:   ISDestroy(&from);
121:   ISDestroy(&to);

123:   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
124:   lsize = (mbs + ec) * bs;
125:   VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0);
126:   VecDuplicate(sbaij->slvec0, &sbaij->slvec1);
127:   VecGetSize(sbaij->slvec0, &vec_size);

129:   VecGetOwnershipRanges(sbaij->slvec0, &sowners);

131:   /* x index in the IS sfrom */
132:   for (i = 0; i < ec; i++) {
133:     j          = ec_owner[i];
134:     sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
135:   }
136:   /* b index in the IS sfrom */
137:   k = sowners[rank] / bs + mbs;
138:   for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
139:   ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from);

141:   /* x index in the IS sto */
142:   k = sowners[rank] / bs + mbs;
143:   for (i = 0; i < ec; i++) stmp[i] = (k + i);
144:   /* b index in the IS sto */
145:   for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];

147:   ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to);

149:   VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx);
150:   VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view");

152:   VecGetLocalSize(sbaij->slvec1, &nt);
153:   VecGetArray(sbaij->slvec1, &ptr);
154:   VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a);
155:   VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b);
156:   VecRestoreArray(sbaij->slvec1, &ptr);

158:   VecGetArray(sbaij->slvec0, &ptr);
159:   VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b);
160:   VecRestoreArray(sbaij->slvec0, &ptr);

162:   PetscFree(stmp);

164:   ISDestroy(&from);
165:   ISDestroy(&to);
166:   PetscFree2(sgarray, ec_owner);
167:   return 0;
168: }

170: /*
171:      Takes the local part of an already assembled MPISBAIJ matrix
172:    and disassembles it. This is to allow new nonzeros into the matrix
173:    that require more communication in the matrix vector multiply.
174:    Thus certain data-structures must be rebuilt.

176:    Kind of slow! But that's what application programmers get when
177:    they are sloppy.
178: */
179: PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
180: {
181:   Mat_MPISBAIJ *baij  = (Mat_MPISBAIJ *)A->data;
182:   Mat           B     = baij->B, Bnew;
183:   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ *)B->data;
184:   PetscInt      i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
185:   PetscInt      k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
186:   MatScalar    *a = Bbaij->a;
187:   PetscScalar  *atmp;
188: #if defined(PETSC_USE_REAL_MAT_SINGLE)
189:   PetscInt l;
190: #endif

192: #if defined(PETSC_USE_REAL_MAT_SINGLE)
193:   PetscMalloc1(A->rmap->bs, &atmp);
194: #endif
195:   /* free stuff related to matrix-vec multiply */
196:   VecDestroy(&baij->lvec);
197:   VecScatterDestroy(&baij->Mvctx);

199:   VecDestroy(&baij->slvec0);
200:   VecDestroy(&baij->slvec0b);
201:   VecDestroy(&baij->slvec1);
202:   VecDestroy(&baij->slvec1a);
203:   VecDestroy(&baij->slvec1b);

205:   if (baij->colmap) {
206: #if defined(PETSC_USE_CTABLE)
207:     PetscTableDestroy(&baij->colmap);
208: #else
209:     PetscFree(baij->colmap);
210: #endif
211:   }

213:   /* make sure that B is assembled so we can access its values */
214:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
215:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);

217:   /* invent new B and copy stuff over */
218:   PetscMalloc1(mbs, &nz);
219:   for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
220:   MatCreate(PETSC_COMM_SELF, &Bnew);
221:   MatSetSizes(Bnew, m, n, m, n);
222:   MatSetType(Bnew, ((PetscObject)B)->type_name);
223:   MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz);
224:   PetscFree(nz);

226:   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
227:     ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
228:   }

230:   /*
231:    Ensure that B's nonzerostate is monotonically increasing.
232:    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
233:    */
234:   Bnew->nonzerostate = B->nonzerostate;
235:   PetscMalloc1(bs, &rvals);
236:   for (i = 0; i < mbs; i++) {
237:     rvals[0] = bs * i;
238:     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
239:     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
240:       col = garray[Bbaij->j[j]] * bs;
241:       for (k = 0; k < bs; k++) {
242: #if defined(PETSC_USE_REAL_MAT_SINGLE)
243:         for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
244: #else
245:         atmp = a + j * bs2 + k * bs;
246: #endif
247:         MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode);
248:         col++;
249:       }
250:     }
251:   }
252: #if defined(PETSC_USE_REAL_MAT_SINGLE)
253:   PetscFree(atmp);
254: #endif
255:   PetscFree(baij->garray);

257:   baij->garray = NULL;

259:   PetscFree(rvals);
260:   MatDestroy(&B);

262:   baij->B = Bnew;

264:   A->was_assembled = PETSC_FALSE;
265:   return 0;
266: }