Actual source code: mmsell.c
1: /*
2: Support for the parallel SELL matrix vector multiply
3: */
4: #include <../src/mat/impls/sell/mpi/mpisell.h>
5: #include <petsc/private/isimpl.h>
7: /*
8: Takes the local part of an already assembled MPISELL matrix
9: and disassembles it. This is to allow new nonzeros into the matrix
10: that require more communication in the matrix vector multiply.
11: Thus certain data-structures must be rebuilt.
13: Kind of slow! But that's what application programmers get when
14: they are sloppy.
15: */
16: PetscErrorCode MatDisAssemble_MPISELL(Mat A)
17: {
18: Mat_MPISELL *sell = (Mat_MPISELL *)A->data;
19: Mat B = sell->B, Bnew;
20: Mat_SeqSELL *Bsell = (Mat_SeqSELL *)B->data;
21: PetscInt i, j, totalslices, N = A->cmap->N, row;
22: PetscBool isnonzero;
24: /* free stuff related to matrix-vec multiply */
25: VecDestroy(&sell->lvec);
26: VecScatterDestroy(&sell->Mvctx);
27: if (sell->colmap) {
28: #if defined(PETSC_USE_CTABLE)
29: PetscTableDestroy(&sell->colmap);
30: #else
31: PetscFree(sell->colmap);
32: #endif
33: }
35: /* make sure that B is assembled so we can access its values */
36: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
37: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
39: /* invent new B and copy stuff over */
40: MatCreate(PETSC_COMM_SELF, &Bnew);
41: MatSetSizes(Bnew, B->rmap->n, N, B->rmap->n, N);
42: MatSetBlockSizesFromMats(Bnew, A, A);
43: MatSetType(Bnew, ((PetscObject)B)->type_name);
44: MatSeqSELLSetPreallocation(Bnew, 0, Bsell->rlen);
45: if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */
46: ((Mat_SeqSELL *)Bnew->data)->nonew = Bsell->nonew;
47: }
49: /*
50: Ensure that B's nonzerostate is monotonically increasing.
51: Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
52: */
53: Bnew->nonzerostate = B->nonzerostate;
55: totalslices = B->rmap->n / 8 + ((B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */
56: for (i = 0; i < totalslices; i++) { /* loop over slices */
57: for (j = Bsell->sliidx[i], row = 0; j < Bsell->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) {
58: isnonzero = (PetscBool)((j - Bsell->sliidx[i]) / 8 < Bsell->rlen[8 * i + row]);
59: if (isnonzero) MatSetValue(Bnew, 8 * i + row, sell->garray[Bsell->colidx[j]], Bsell->val[j], B->insertmode);
60: }
61: }
63: PetscFree(sell->garray);
64: MatDestroy(&B);
66: sell->B = Bnew;
67: A->was_assembled = PETSC_FALSE;
68: A->assembled = PETSC_FALSE;
69: return 0;
70: }
72: PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat)
73: {
74: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
75: Mat_SeqSELL *B = (Mat_SeqSELL *)(sell->B->data);
76: PetscInt i, j, *bcolidx = B->colidx, ec = 0, *garray, totalslices;
77: IS from, to;
78: Vec gvec;
79: PetscBool isnonzero;
80: #if defined(PETSC_USE_CTABLE)
81: PetscTable gid1_lid1;
82: PetscTablePosition tpos;
83: PetscInt gid, lid;
84: #else
85: PetscInt N = mat->cmap->N, *indices;
86: #endif
88: totalslices = sell->B->rmap->n / 8 + ((sell->B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */
90: /* ec counts the number of columns that contain nonzeros */
91: #if defined(PETSC_USE_CTABLE)
92: /* use a table */
93: PetscTableCreate(sell->B->rmap->n, mat->cmap->N + 1, &gid1_lid1);
94: for (i = 0; i < totalslices; i++) { /* loop over slices */
95: for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
96: isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
97: if (isnonzero) { /* check the mask bit */
98: PetscInt data, gid1 = bcolidx[j] + 1;
99: PetscTableFind(gid1_lid1, gid1, &data);
100: if (!data) {
101: /* one based table */
102: PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES);
103: }
104: }
105: }
106: }
108: /* form array of columns we need */
109: PetscMalloc1(ec, &garray);
110: PetscTableGetHeadPosition(gid1_lid1, &tpos);
111: while (tpos) {
112: PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid);
113: gid--;
114: lid--;
115: garray[lid] = gid;
116: }
117: PetscSortInt(ec, garray); /* sort, and rebuild */
118: PetscTableRemoveAll(gid1_lid1);
119: for (i = 0; i < ec; i++) PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES);
121: /* compact out the extra columns in B */
122: for (i = 0; i < totalslices; i++) { /* loop over slices */
123: for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
124: isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
125: if (isnonzero) {
126: PetscInt gid1 = bcolidx[j] + 1;
127: PetscTableFind(gid1_lid1, gid1, &lid);
128: lid--;
129: bcolidx[j] = lid;
130: }
131: }
132: }
133: PetscLayoutDestroy(&sell->B->cmap);
134: PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap);
135: PetscTableDestroy(&gid1_lid1);
136: #else
137: /* Make an array as long as the number of columns */
138: PetscCalloc1(N, &indices);
139: /* mark those columns that are in sell->B */
140: for (i = 0; i < totalslices; i++) { /* loop over slices */
141: for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
142: isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
143: if (isnonzero) {
144: if (!indices[bcolidx[j]]) ec++;
145: indices[bcolidx[j]] = 1;
146: }
147: }
148: }
150: /* form array of columns we need */
151: PetscMalloc1(ec, &garray);
152: ec = 0;
153: for (i = 0; i < N; i++) {
154: if (indices[i]) garray[ec++] = i;
155: }
157: /* make indices now point into garray */
158: for (i = 0; i < ec; i++) indices[garray[i]] = i;
160: /* compact out the extra columns in B */
161: for (i = 0; i < totalslices; i++) { /* loop over slices */
162: for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
163: isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
164: if (isnonzero) bcolidx[j] = indices[bcolidx[j]];
165: }
166: }
167: PetscLayoutDestroy(&sell->B->cmap);
168: PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap);
169: PetscFree(indices);
170: #endif
171: /* create local vector that is used to scatter into */
172: VecCreateSeq(PETSC_COMM_SELF, ec, &sell->lvec);
173: /* create two temporary Index sets for build scatter gather */
174: ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from);
175: ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to);
177: /* create temporary global vector to generate scatter context */
178: /* This does not allocate the array's memory so is efficient */
179: VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec);
181: /* generate the scatter context */
182: VecScatterCreate(gvec, from, sell->lvec, to, &sell->Mvctx);
183: VecScatterViewFromOptions(sell->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view");
185: sell->garray = garray;
187: ISDestroy(&from);
188: ISDestroy(&to);
189: VecDestroy(&gvec);
190: return 0;
191: }
193: /* ugly stuff added for Glenn someday we should fix this up */
194: static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
195: static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */
197: PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA, Vec scale)
198: {
199: Mat_MPISELL *ina = (Mat_MPISELL *)inA->data; /*access private part of matrix */
200: PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices;
201: PetscInt *r_rmapd, *r_rmapo;
203: MatGetOwnershipRange(inA, &cstart, &cend);
204: MatGetSize(ina->A, NULL, &n);
205: PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd);
206: nt = 0;
207: for (i = 0; i < inA->rmap->mapping->n; i++) {
208: if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
209: nt++;
210: r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
211: }
212: }
214: PetscMalloc1(n + 1, &auglyrmapd);
215: for (i = 0; i < inA->rmap->mapping->n; i++) {
216: if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i;
217: }
218: PetscFree(r_rmapd);
219: VecCreateSeq(PETSC_COMM_SELF, n, &auglydd);
220: PetscCalloc1(inA->cmap->N + 1, &lindices);
221: for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1;
222: no = inA->rmap->mapping->n - nt;
223: PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo);
224: nt = 0;
225: for (i = 0; i < inA->rmap->mapping->n; i++) {
226: if (lindices[inA->rmap->mapping->indices[i]]) {
227: nt++;
228: r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
229: }
230: }
232: PetscFree(lindices);
233: PetscMalloc1(nt + 1, &auglyrmapo);
234: for (i = 0; i < inA->rmap->mapping->n; i++) {
235: if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i;
236: }
237: PetscFree(r_rmapo);
238: VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo);
239: return 0;
240: }
242: PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A, Vec scale)
243: {
244: Mat_MPISELL *a = (Mat_MPISELL *)A->data; /*access private part of matrix */
245: PetscInt n, i;
246: PetscScalar *d, *o;
247: const PetscScalar *s;
249: if (!auglyrmapd) MatMPISELLDiagonalScaleLocalSetUp(A, scale);
250: VecGetArrayRead(scale, &s);
251: VecGetLocalSize(auglydd, &n);
252: VecGetArray(auglydd, &d);
253: for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
254: VecRestoreArray(auglydd, &d);
255: /* column scale "diagonal" portion of local matrix */
256: MatDiagonalScale(a->A, NULL, auglydd);
257: VecGetLocalSize(auglyoo, &n);
258: VecGetArray(auglyoo, &o);
259: for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
260: VecRestoreArrayRead(scale, &s);
261: VecRestoreArray(auglyoo, &o);
262: /* column scale "off-diagonal" portion of local matrix */
263: MatDiagonalScale(a->B, NULL, auglyoo);
264: return 0;
265: }