Actual source code: mmsbaij.c
2: /*
3: Support for the parallel SBAIJ matrix vector multiply
4: */
5: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
7: PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8: {
9: Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ *)mat->data;
10: Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(sbaij->B->data);
11: PetscInt Nbs = sbaij->Nbs, i, j, *aj = B->j, ec = 0, *garray, *sgarray;
12: PetscInt bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
13: IS from, to;
14: Vec gvec;
15: PetscMPIInt rank = sbaij->rank, lsize;
16: PetscInt *owners = sbaij->rangebs, *ec_owner, k;
17: const PetscInt *sowners;
18: PetscScalar *ptr;
19: #if defined(PETSC_USE_CTABLE)
20: PetscTable gid1_lid1; /* one-based gid to lid table */
21: PetscTablePosition tpos;
22: PetscInt gid, lid;
23: #else
24: PetscInt *indices;
25: #endif
27: #if defined(PETSC_USE_CTABLE)
28: PetscTableCreate(mbs, Nbs + 1, &gid1_lid1);
29: for (i = 0; i < B->mbs; i++) {
30: for (j = 0; j < B->ilen[i]; j++) {
31: PetscInt data, gid1 = aj[B->i[i] + j] + 1;
32: PetscTableFind(gid1_lid1, gid1, &data);
33: if (!data) PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES);
34: }
35: }
36: /* form array of columns we need */
37: PetscMalloc1(ec, &garray);
38: PetscTableGetHeadPosition(gid1_lid1, &tpos);
39: while (tpos) {
40: PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid);
41: gid--;
42: lid--;
43: garray[lid] = gid;
44: }
45: PetscSortInt(ec, garray);
46: PetscTableRemoveAll(gid1_lid1);
47: for (i = 0; i < ec; i++) PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES);
48: /* compact out the extra columns in B */
49: for (i = 0; i < B->mbs; i++) {
50: for (j = 0; j < B->ilen[i]; j++) {
51: PetscInt gid1 = aj[B->i[i] + j] + 1;
52: PetscTableFind(gid1_lid1, gid1, &lid);
53: lid--;
54: aj[B->i[i] + j] = lid;
55: }
56: }
57: PetscTableDestroy(&gid1_lid1);
58: PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner);
59: for (i = j = 0; i < ec; i++) {
60: while (garray[i] >= owners[j + 1]) j++;
61: ec_owner[i] = j;
62: }
63: #else
64: /* For the first stab we make an array as long as the number of columns */
65: /* mark those columns that are in sbaij->B */
66: PetscCalloc1(Nbs, &indices);
67: for (i = 0; i < mbs; i++) {
68: for (j = 0; j < B->ilen[i]; j++) {
69: if (!indices[aj[B->i[i] + j]]) ec++;
70: indices[aj[B->i[i] + j]] = 1;
71: }
72: }
74: /* form arrays of columns we need */
75: PetscMalloc1(ec, &garray);
76: PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner);
78: ec = 0;
79: for (j = 0; j < sbaij->size; j++) {
80: for (i = owners[j]; i < owners[j + 1]; i++) {
81: if (indices[i]) {
82: garray[ec] = i;
83: ec_owner[ec] = j;
84: ec++;
85: }
86: }
87: }
89: /* make indices now point into garray */
90: for (i = 0; i < ec; i++) indices[garray[i]] = i;
92: /* compact out the extra columns in B */
93: for (i = 0; i < mbs; i++) {
94: for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
95: }
96: PetscFree(indices);
97: #endif
98: B->nbs = ec;
99: PetscLayoutDestroy(&sbaij->B->cmap);
100: PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap);
102: VecScatterDestroy(&sbaij->sMvctx);
103: /* create local vector that is used to scatter into */
104: VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec);
106: /* create two temporary index sets for building scatter-gather */
107: PetscMalloc1(2 * ec, &stmp);
108: ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from);
109: for (i = 0; i < ec; i++) stmp[i] = i;
110: ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to);
112: /* generate the scatter context
113: -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
114: VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec);
115: VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx);
116: VecDestroy(&gvec);
118: sbaij->garray = garray;
120: ISDestroy(&from);
121: ISDestroy(&to);
123: /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
124: lsize = (mbs + ec) * bs;
125: VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0);
126: VecDuplicate(sbaij->slvec0, &sbaij->slvec1);
127: VecGetSize(sbaij->slvec0, &vec_size);
129: VecGetOwnershipRanges(sbaij->slvec0, &sowners);
131: /* x index in the IS sfrom */
132: for (i = 0; i < ec; i++) {
133: j = ec_owner[i];
134: sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
135: }
136: /* b index in the IS sfrom */
137: k = sowners[rank] / bs + mbs;
138: for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
139: ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from);
141: /* x index in the IS sto */
142: k = sowners[rank] / bs + mbs;
143: for (i = 0; i < ec; i++) stmp[i] = (k + i);
144: /* b index in the IS sto */
145: for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];
147: ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to);
149: VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx);
150: VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view");
152: VecGetLocalSize(sbaij->slvec1, &nt);
153: VecGetArray(sbaij->slvec1, &ptr);
154: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a);
155: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b);
156: VecRestoreArray(sbaij->slvec1, &ptr);
158: VecGetArray(sbaij->slvec0, &ptr);
159: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b);
160: VecRestoreArray(sbaij->slvec0, &ptr);
162: PetscFree(stmp);
164: ISDestroy(&from);
165: ISDestroy(&to);
166: PetscFree2(sgarray, ec_owner);
167: return 0;
168: }
170: /*
171: Takes the local part of an already assembled MPISBAIJ matrix
172: and disassembles it. This is to allow new nonzeros into the matrix
173: that require more communication in the matrix vector multiply.
174: Thus certain data-structures must be rebuilt.
176: Kind of slow! But that's what application programmers get when
177: they are sloppy.
178: */
179: PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
180: {
181: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)A->data;
182: Mat B = baij->B, Bnew;
183: Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data;
184: PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
185: PetscInt k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
186: MatScalar *a = Bbaij->a;
187: PetscScalar *atmp;
188: #if defined(PETSC_USE_REAL_MAT_SINGLE)
189: PetscInt l;
190: #endif
192: #if defined(PETSC_USE_REAL_MAT_SINGLE)
193: PetscMalloc1(A->rmap->bs, &atmp);
194: #endif
195: /* free stuff related to matrix-vec multiply */
196: VecDestroy(&baij->lvec);
197: VecScatterDestroy(&baij->Mvctx);
199: VecDestroy(&baij->slvec0);
200: VecDestroy(&baij->slvec0b);
201: VecDestroy(&baij->slvec1);
202: VecDestroy(&baij->slvec1a);
203: VecDestroy(&baij->slvec1b);
205: if (baij->colmap) {
206: #if defined(PETSC_USE_CTABLE)
207: PetscTableDestroy(&baij->colmap);
208: #else
209: PetscFree(baij->colmap);
210: #endif
211: }
213: /* make sure that B is assembled so we can access its values */
214: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
215: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
217: /* invent new B and copy stuff over */
218: PetscMalloc1(mbs, &nz);
219: for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
220: MatCreate(PETSC_COMM_SELF, &Bnew);
221: MatSetSizes(Bnew, m, n, m, n);
222: MatSetType(Bnew, ((PetscObject)B)->type_name);
223: MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz);
224: PetscFree(nz);
226: if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
227: ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
228: }
230: /*
231: Ensure that B's nonzerostate is monotonically increasing.
232: Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
233: */
234: Bnew->nonzerostate = B->nonzerostate;
235: PetscMalloc1(bs, &rvals);
236: for (i = 0; i < mbs; i++) {
237: rvals[0] = bs * i;
238: for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
239: for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
240: col = garray[Bbaij->j[j]] * bs;
241: for (k = 0; k < bs; k++) {
242: #if defined(PETSC_USE_REAL_MAT_SINGLE)
243: for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
244: #else
245: atmp = a + j * bs2 + k * bs;
246: #endif
247: MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode);
248: col++;
249: }
250: }
251: }
252: #if defined(PETSC_USE_REAL_MAT_SINGLE)
253: PetscFree(atmp);
254: #endif
255: PetscFree(baij->garray);
257: baij->garray = NULL;
259: PetscFree(rvals);
260: MatDestroy(&B);
262: baij->B = Bnew;
264: A->was_assembled = PETSC_FALSE;
265: return 0;
266: }