Actual source code: fdaij.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/mat/impls/baij/seq/baij.h>
  3: #include <../src/mat/impls/sell/seq/sell.h>
  4: #include <petsc/private/isimpl.h>

  6: /*
  7:     This routine is shared by SeqAIJ and SeqBAIJ matrices,
  8:     since it operators only on the nonzero structure of the elements or blocks.
  9: */
 10: PetscErrorCode MatFDColoringCreate_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
 11: {
 12:   PetscInt  bs, nis = iscoloring->n, m = mat->rmap->n;
 13:   PetscBool isBAIJ, isSELL;

 15:   /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian */
 16:   MatGetBlockSize(mat, &bs);
 17:   PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ);
 18:   PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL);
 19:   if (isBAIJ) {
 20:     c->brows = m;
 21:     c->bcols = 1;
 22:   } else { /* seqaij matrix */
 23:     /* bcols is chosen s.t. dy-array takes 50% of memory space as mat */
 24:     PetscReal mem;
 25:     PetscInt  nz, brows, bcols;
 26:     if (isSELL) {
 27:       Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data;
 28:       nz               = spA->nz;
 29:     } else {
 30:       Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data;
 31:       nz              = spA->nz;
 32:     }

 34:     bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
 35:     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
 36:     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
 37:     brows = 1000 / bcols;
 38:     if (bcols > nis) bcols = nis;
 39:     if (brows == 0 || brows > m) brows = m;
 40:     c->brows = brows;
 41:     c->bcols = bcols;
 42:   }

 44:   c->M       = mat->rmap->N / bs; /* set total rows, columns and local rows */
 45:   c->N       = mat->cmap->N / bs;
 46:   c->m       = mat->rmap->N / bs;
 47:   c->rstart  = 0;
 48:   c->ncolors = nis;
 49:   c->ctype   = iscoloring->ctype;
 50:   return 0;
 51: }

 53: /*
 54:  Reorder Jentry such that blocked brows*bols of entries from dense matrix are inserted into Jacobian for improved cache performance
 55:    Input Parameters:
 56: +  mat - the matrix containing the nonzero structure of the Jacobian
 57: .  color - the coloring context
 58: -  nz - number of local non-zeros in mat
 59: */
 60: PetscErrorCode MatFDColoringSetUpBlocked_AIJ_Private(Mat mat, MatFDColoring c, PetscInt nz)
 61: {
 62:   PetscInt  i, j, nrows, nbcols, brows = c->brows, bcols = c->bcols, mbs = c->m, nis = c->ncolors;
 63:   PetscInt *color_start, *row_start, *nrows_new, nz_new, row_end;

 65:   if (brows < 1 || brows > mbs) brows = mbs;
 66:   PetscMalloc2(bcols + 1, &color_start, bcols, &row_start);
 67:   PetscCalloc1(nis, &nrows_new);
 68:   PetscMalloc1(bcols * mat->rmap->n, &c->dy);

 70:   nz_new             = 0;
 71:   nbcols             = 0;
 72:   color_start[bcols] = 0;

 74:   if (c->htype[0] == 'd') { /* ----  c->htype == 'ds', use MatEntry --------*/
 75:     MatEntry *Jentry_new, *Jentry = c->matentry;

 77:     PetscMalloc1(nz, &Jentry_new);
 78:     for (i = 0; i < nis; i += bcols) { /* loop over colors */
 79:       if (i + bcols > nis) {
 80:         color_start[nis - i] = color_start[bcols];
 81:         bcols                = nis - i;
 82:       }

 84:       color_start[0] = color_start[bcols];
 85:       for (j = 0; j < bcols; j++) {
 86:         color_start[j + 1] = c->nrows[i + j] + color_start[j];
 87:         row_start[j]       = 0;
 88:       }

 90:       row_end = brows;
 91:       if (row_end > mbs) row_end = mbs;

 93:       while (row_end <= mbs) {        /* loop over block rows */
 94:         for (j = 0; j < bcols; j++) { /* loop over block columns */
 95:           nrows = c->nrows[i + j];
 96:           nz    = color_start[j];
 97:           while (row_start[j] < nrows) {
 98:             if (Jentry[nz].row >= row_end) {
 99:               color_start[j] = nz;
100:               break;
101:             } else {                                                 /* copy Jentry[nz] to Jentry_new[nz_new] */
102:               Jentry_new[nz_new].row     = Jentry[nz].row + j * mbs; /* index in dy-array */
103:               Jentry_new[nz_new].col     = Jentry[nz].col;
104:               Jentry_new[nz_new].valaddr = Jentry[nz].valaddr;
105:               nz_new++;
106:               nz++;
107:               row_start[j]++;
108:             }
109:           }
110:         }
111:         if (row_end == mbs) break;
112:         row_end += brows;
113:         if (row_end > mbs) row_end = mbs;
114:       }
115:       nrows_new[nbcols++] = nz_new;
116:     }
117:     PetscFree(Jentry);
118:     c->matentry = Jentry_new;
119:   } else { /* ---------  c->htype == 'wp', use MatEntry2 ------------------*/
120:     MatEntry2 *Jentry2_new, *Jentry2 = c->matentry2;

122:     PetscMalloc1(nz, &Jentry2_new);
123:     for (i = 0; i < nis; i += bcols) { /* loop over colors */
124:       if (i + bcols > nis) {
125:         color_start[nis - i] = color_start[bcols];
126:         bcols                = nis - i;
127:       }

129:       color_start[0] = color_start[bcols];
130:       for (j = 0; j < bcols; j++) {
131:         color_start[j + 1] = c->nrows[i + j] + color_start[j];
132:         row_start[j]       = 0;
133:       }

135:       row_end = brows;
136:       if (row_end > mbs) row_end = mbs;

138:       while (row_end <= mbs) {        /* loop over block rows */
139:         for (j = 0; j < bcols; j++) { /* loop over block columns */
140:           nrows = c->nrows[i + j];
141:           nz    = color_start[j];
142:           while (row_start[j] < nrows) {
143:             if (Jentry2[nz].row >= row_end) {
144:               color_start[j] = nz;
145:               break;
146:             } else {                                                   /* copy Jentry2[nz] to Jentry2_new[nz_new] */
147:               Jentry2_new[nz_new].row     = Jentry2[nz].row + j * mbs; /* index in dy-array */
148:               Jentry2_new[nz_new].valaddr = Jentry2[nz].valaddr;
149:               nz_new++;
150:               nz++;
151:               row_start[j]++;
152:             }
153:           }
154:         }
155:         if (row_end == mbs) break;
156:         row_end += brows;
157:         if (row_end > mbs) row_end = mbs;
158:       }
159:       nrows_new[nbcols++] = nz_new;
160:     }
161:     PetscFree(Jentry2);
162:     c->matentry2 = Jentry2_new;
163:   } /* ---------------------------------------------*/

165:   PetscFree2(color_start, row_start);

167:   for (i = nbcols - 1; i > 0; i--) nrows_new[i] -= nrows_new[i - 1];
168:   PetscFree(c->nrows);
169:   c->nrows = nrows_new;
170:   return 0;
171: }

173: PetscErrorCode MatFDColoringSetUp_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
174: {
175:   PetscInt           i, n, nrows, mbs = c->m, j, k, m, ncols, col, nis = iscoloring->n, *rowhit, bs, bs2, *spidx, nz, tmp;
176:   const PetscInt    *is, *row, *ci, *cj;
177:   PetscBool          isBAIJ, isSELL;
178:   const PetscScalar *A_val;
179:   PetscScalar      **valaddrhit;
180:   MatEntry          *Jentry;
181:   MatEntry2         *Jentry2;

183:   ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa);

185:   MatGetBlockSize(mat, &bs);
186:   PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ);
187:   PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL);
188:   if (isBAIJ) {
189:     Mat_SeqBAIJ *spA = (Mat_SeqBAIJ *)mat->data;

191:     A_val = spA->a;
192:     nz    = spA->nz;
193:   } else if (isSELL) {
194:     Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data;

196:     A_val = spA->val;
197:     nz    = spA->nz;
198:     bs    = 1; /* only bs=1 is supported for SeqSELL matrix */
199:   } else {
200:     Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data;

202:     A_val = spA->a;
203:     nz    = spA->nz;
204:     bs    = 1; /* only bs=1 is supported for SeqAIJ matrix */
205:   }

207:   PetscMalloc2(nis, &c->ncolumns, nis, &c->columns);
208:   PetscMalloc1(nis, &c->nrows); /* nrows is freed separately from ncolumns and columns */

210:   if (c->htype[0] == 'd') {
211:     PetscMalloc1(nz, &Jentry);
212:     c->matentry = Jentry;
213:   } else if (c->htype[0] == 'w') {
214:     PetscMalloc1(nz, &Jentry2);
215:     c->matentry2 = Jentry2;
216:   } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported");

218:   if (isBAIJ) {
219:     MatGetColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL);
220:   } else if (isSELL) {
221:     MatGetColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL);
222:   } else {
223:     MatGetColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL);
224:   }

226:   PetscCalloc1(c->m, &rowhit);
227:   PetscMalloc1(c->m, &valaddrhit);

229:   nz = 0;
230:   for (i = 0; i < nis; i++) { /* loop over colors */
231:     ISGetLocalSize(c->isa[i], &n);
232:     ISGetIndices(c->isa[i], &is);

234:     c->ncolumns[i] = n;
235:     c->columns[i]  = (PetscInt *)is;
236:     /* note: we know that c->isa is going to be around as long at the c->columns values */
237:     ISRestoreIndices(c->isa[i], &is);

239:     /* fast, crude version requires O(N*N) work */
240:     bs2   = bs * bs;
241:     nrows = 0;
242:     for (j = 0; j < n; j++) { /* loop over columns */
243:       col = is[j];
244:       tmp = ci[col];
245:       row = cj + tmp;
246:       m   = ci[col + 1] - tmp;
247:       nrows += m;
248:       for (k = 0; k < m; k++) { /* loop over columns marking them in rowhit */
249:         rowhit[*row]       = col + 1;
250:         valaddrhit[*row++] = (PetscScalar *)&A_val[bs2 * spidx[tmp + k]];
251:       }
252:     }
253:     c->nrows[i] = nrows; /* total num of rows for this color */

255:     if (c->htype[0] == 'd') {
256:       for (j = 0; j < mbs; j++) { /* loop over rows */
257:         if (rowhit[j]) {
258:           Jentry[nz].row     = j;             /* local row index */
259:           Jentry[nz].col     = rowhit[j] - 1; /* local column index */
260:           Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
261:           nz++;
262:           rowhit[j] = 0.0; /* zero rowhit for reuse */
263:         }
264:       }
265:     } else {                      /* c->htype == 'wp' */
266:       for (j = 0; j < mbs; j++) { /* loop over rows */
267:         if (rowhit[j]) {
268:           Jentry2[nz].row     = j;             /* local row index */
269:           Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
270:           nz++;
271:           rowhit[j] = 0.0; /* zero rowhit for reuse */
272:         }
273:       }
274:     }
275:   }

277:   if (c->bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */
278:     MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz);
279:   }

281:   if (isBAIJ) {
282:     MatRestoreColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL);
283:     PetscMalloc1(bs * mat->rmap->n, &c->dy);
284:   } else if (isSELL) {
285:     MatRestoreColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL);
286:   } else {
287:     MatRestoreColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL);
288:   }
289:   PetscFree(rowhit);
290:   PetscFree(valaddrhit);
291:   ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa);

293:   VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->rmap->n, PETSC_DETERMINE, 0, NULL, &c->vscale);
294:   PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols);
295:   return 0;
296: }