Actual source code: fdmpiaij.c

  1: #include <../src/mat/impls/sell/mpi/mpisell.h>
  2: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  3: #include <../src/mat/impls/baij/mpi/mpibaij.h>
  4: #include <petsc/private/isimpl.h>

  6: PetscErrorCode MatFDColoringApply_BAIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx)
  7: {
  8:   PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f;
  9:   PetscInt           k, cstart, cend, l, row, col, nz, spidx, i, j;
 10:   PetscScalar        dx = 0.0, *w3_array, *dy_i, *dy = coloring->dy;
 11:   PetscScalar       *vscale_array;
 12:   const PetscScalar *xx;
 13:   PetscReal          epsilon = coloring->error_rel, umin = coloring->umin, unorm;
 14:   Vec                w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale;
 15:   void              *fctx  = coloring->fctx;
 16:   PetscInt           ctype = coloring->ctype, nxloc, nrows_k;
 17:   PetscScalar       *valaddr;
 18:   MatEntry          *Jentry  = coloring->matentry;
 19:   MatEntry2         *Jentry2 = coloring->matentry2;
 20:   const PetscInt     ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows;
 21:   PetscInt           bs = J->rmap->bs;

 23:   VecBindToCPU(x1, PETSC_TRUE);
 24:   /* (1) Set w1 = F(x1) */
 25:   if (!coloring->fset) {
 26:     PetscLogEventBegin(MAT_FDColoringFunction, coloring, 0, 0, 0);
 27:     (*f)(sctx, x1, w1, fctx);
 28:     PetscLogEventEnd(MAT_FDColoringFunction, coloring, 0, 0, 0);
 29:   } else {
 30:     coloring->fset = PETSC_FALSE;
 31:   }

 33:   /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
 34:   VecGetLocalSize(x1, &nxloc);
 35:   if (coloring->htype[0] == 'w') {
 36:     /* vscale = dx is a constant scalar */
 37:     VecNorm(x1, NORM_2, &unorm);
 38:     dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon);
 39:   } else {
 40:     VecGetArrayRead(x1, &xx);
 41:     VecGetArray(vscale, &vscale_array);
 42:     for (col = 0; col < nxloc; col++) {
 43:       dx = xx[col];
 44:       if (PetscAbsScalar(dx) < umin) {
 45:         if (PetscRealPart(dx) >= 0.0) dx = umin;
 46:         else if (PetscRealPart(dx) < 0.0) dx = -umin;
 47:       }
 48:       dx *= epsilon;
 49:       vscale_array[col] = 1.0 / dx;
 50:     }
 51:     VecRestoreArrayRead(x1, &xx);
 52:     VecRestoreArray(vscale, &vscale_array);
 53:   }
 54:   if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
 55:     VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD);
 56:     VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD);
 57:   }

 59:   /* (3) Loop over each color */
 60:   if (!coloring->w3) {
 61:     VecDuplicate(x1, &coloring->w3);
 62:     /* Vec is used intensively in particular piece of scalar CPU code; won't benefit from bouncing back and forth to the GPU */
 63:     VecBindToCPU(coloring->w3, PETSC_TRUE);
 64:   }
 65:   w3 = coloring->w3;

 67:   VecGetOwnershipRange(x1, &cstart, &cend); /* used by ghosted vscale */
 68:   if (vscale) VecGetArray(vscale, &vscale_array);
 69:   nz = 0;
 70:   for (k = 0; k < ncolors; k++) {
 71:     coloring->currentcolor = k;

 73:     /*
 74:       (3-1) Loop over each column associated with color
 75:       adding the perturbation to the vector w3 = x1 + dx.
 76:     */
 77:     VecCopy(x1, w3);
 78:     dy_i = dy;
 79:     for (i = 0; i < bs; i++) { /* Loop over a block of columns */
 80:       VecGetArray(w3, &w3_array);
 81:       if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
 82:       if (coloring->htype[0] == 'w') {
 83:         for (l = 0; l < ncolumns[k]; l++) {
 84:           col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
 85:           w3_array[col] += 1.0 / dx;
 86:           if (i) w3_array[col - 1] -= 1.0 / dx; /* resume original w3[col-1] */
 87:         }
 88:       } else {                  /* htype == 'ds' */
 89:         vscale_array -= cstart; /* shift pointer so global index can be used */
 90:         for (l = 0; l < ncolumns[k]; l++) {
 91:           col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
 92:           w3_array[col] += 1.0 / vscale_array[col];
 93:           if (i) w3_array[col - 1] -= 1.0 / vscale_array[col - 1]; /* resume original w3[col-1] */
 94:         }
 95:         vscale_array += cstart;
 96:       }
 97:       if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
 98:       VecRestoreArray(w3, &w3_array);

100:       /*
101:        (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
102:                            w2 = F(x1 + dx) - F(x1)
103:        */
104:       PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0);
105:       VecPlaceArray(w2, dy_i); /* place w2 to the array dy_i */
106:       (*f)(sctx, w3, w2, fctx);
107:       PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0);
108:       VecAXPY(w2, -1.0, w1);
109:       VecResetArray(w2);
110:       dy_i += nxloc; /* points to dy+i*nxloc */
111:     }

113:     /*
114:      (3-3) Loop over rows of vector, putting results into Jacobian matrix
115:     */
116:     nrows_k = nrows[k];
117:     if (coloring->htype[0] == 'w') {
118:       for (l = 0; l < nrows_k; l++) {
119:         row     = bs * Jentry2[nz].row; /* local row index */
120:         valaddr = Jentry2[nz++].valaddr;
121:         spidx   = 0;
122:         dy_i    = dy;
123:         for (i = 0; i < bs; i++) {   /* column of the block */
124:           for (j = 0; j < bs; j++) { /* row of the block */
125:             valaddr[spidx++] = dy_i[row + j] * dx;
126:           }
127:           dy_i += nxloc; /* points to dy+i*nxloc */
128:         }
129:       }
130:     } else { /* htype == 'ds' */
131:       for (l = 0; l < nrows_k; l++) {
132:         row     = bs * Jentry[nz].row; /* local row index */
133:         col     = bs * Jentry[nz].col; /* local column index */
134:         valaddr = Jentry[nz++].valaddr;
135:         spidx   = 0;
136:         dy_i    = dy;
137:         for (i = 0; i < bs; i++) {   /* column of the block */
138:           for (j = 0; j < bs; j++) { /* row of the block */
139:             valaddr[spidx++] = dy_i[row + j] * vscale_array[col + i];
140:           }
141:           dy_i += nxloc; /* points to dy+i*nxloc */
142:         }
143:       }
144:     }
145:   }
146:   MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
147:   MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
148:   if (vscale) VecRestoreArray(vscale, &vscale_array);

150:   coloring->currentcolor = -1;
151:   VecBindToCPU(x1, PETSC_FALSE);
152:   return 0;
153: }

155: /* this is declared PETSC_EXTERN because it is used by MatFDColoringUseDM() which is in the DM library */
156: PetscErrorCode MatFDColoringApply_AIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx)
157: {
158:   PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f;
159:   PetscInt           k, cstart, cend, l, row, col, nz;
160:   PetscScalar        dx = 0.0, *y, *w3_array;
161:   const PetscScalar *xx;
162:   PetscScalar       *vscale_array;
163:   PetscReal          epsilon = coloring->error_rel, umin = coloring->umin, unorm;
164:   Vec                w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale;
165:   void              *fctx  = coloring->fctx;
166:   ISColoringType     ctype = coloring->ctype;
167:   PetscInt           nxloc, nrows_k;
168:   MatEntry          *Jentry  = coloring->matentry;
169:   MatEntry2         *Jentry2 = coloring->matentry2;
170:   const PetscInt     ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows;
171:   PetscBool          alreadyboundtocpu;

173:   VecBoundToCPU(x1, &alreadyboundtocpu);
174:   VecBindToCPU(x1, PETSC_TRUE);
176:   /* (1) Set w1 = F(x1) */
177:   if (!coloring->fset) {
178:     PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0);
179:     (*f)(sctx, x1, w1, fctx);
180:     PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0);
181:   } else {
182:     coloring->fset = PETSC_FALSE;
183:   }

185:   /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
186:   if (coloring->htype[0] == 'w') {
187:     /* vscale = 1./dx is a constant scalar */
188:     VecNorm(x1, NORM_2, &unorm);
189:     dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon);
190:   } else {
191:     VecGetLocalSize(x1, &nxloc);
192:     VecGetArrayRead(x1, &xx);
193:     VecGetArray(vscale, &vscale_array);
194:     for (col = 0; col < nxloc; col++) {
195:       dx = xx[col];
196:       if (PetscAbsScalar(dx) < umin) {
197:         if (PetscRealPart(dx) >= 0.0) dx = umin;
198:         else if (PetscRealPart(dx) < 0.0) dx = -umin;
199:       }
200:       dx *= epsilon;
201:       vscale_array[col] = 1.0 / dx;
202:     }
203:     VecRestoreArrayRead(x1, &xx);
204:     VecRestoreArray(vscale, &vscale_array);
205:   }
206:   if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
207:     VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD);
208:     VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD);
209:   }

211:   /* (3) Loop over each color */
212:   if (!coloring->w3) { VecDuplicate(x1, &coloring->w3); }
213:   w3 = coloring->w3;

215:   VecGetOwnershipRange(x1, &cstart, &cend); /* used by ghosted vscale */
216:   if (vscale) VecGetArray(vscale, &vscale_array);
217:   nz = 0;

219:   if (coloring->bcols > 1) { /* use blocked insertion of Jentry */
220:     PetscInt     i, m = J->rmap->n, nbcols, bcols = coloring->bcols;
221:     PetscScalar *dy = coloring->dy, *dy_k;

223:     nbcols = 0;
224:     for (k = 0; k < ncolors; k += bcols) {
225:       /*
226:        (3-1) Loop over each column associated with color
227:        adding the perturbation to the vector w3 = x1 + dx.
228:        */

230:       dy_k = dy;
231:       if (k + bcols > ncolors) bcols = ncolors - k;
232:       for (i = 0; i < bcols; i++) {
233:         coloring->currentcolor = k + i;

235:         VecCopy(x1, w3);
236:         VecGetArray(w3, &w3_array);
237:         if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
238:         if (coloring->htype[0] == 'w') {
239:           for (l = 0; l < ncolumns[k + i]; l++) {
240:             col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */
241:             w3_array[col] += 1.0 / dx;
242:           }
243:         } else {                  /* htype == 'ds' */
244:           vscale_array -= cstart; /* shift pointer so global index can be used */
245:           for (l = 0; l < ncolumns[k + i]; l++) {
246:             col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */
247:             w3_array[col] += 1.0 / vscale_array[col];
248:           }
249:           vscale_array += cstart;
250:         }
251:         if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
252:         VecRestoreArray(w3, &w3_array);

254:         /*
255:          (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
256:                            w2 = F(x1 + dx) - F(x1)
257:          */
258:         PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0);
259:         VecPlaceArray(w2, dy_k); /* place w2 to the array dy_i */
260:         (*f)(sctx, w3, w2, fctx);
261:         PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0);
262:         VecAXPY(w2, -1.0, w1);
263:         VecResetArray(w2);
264:         dy_k += m; /* points to dy+i*nxloc */
265:       }

267:       /*
268:        (3-3) Loop over block rows of vector, putting results into Jacobian matrix
269:        */
270:       nrows_k = nrows[nbcols++];

272:       if (coloring->htype[0] == 'w') {
273:         for (l = 0; l < nrows_k; l++) {
274:           row = Jentry2[nz].row; /* local row index */
275:                                  /* The 'useless' ifdef is due to a bug in NVIDIA nvc 21.11, which triggers a segfault on this line. We write it in
276:              another way, and it seems work. See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html
277:            */
278: #if defined(PETSC_USE_COMPLEX)
279:           PetscScalar *tmp = Jentry2[nz].valaddr;
280:           *tmp             = dy[row] * dx;
281: #else
282:           *(Jentry2[nz].valaddr) = dy[row] * dx;
283: #endif
284:           nz++;
285:         }
286:       } else { /* htype == 'ds' */
287:         for (l = 0; l < nrows_k; l++) {
288:           row = Jentry[nz].row; /* local row index */
289: #if defined(PETSC_USE_COMPLEX)  /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
290:           PetscScalar *tmp = Jentry[nz].valaddr;
291:           *tmp             = dy[row] * vscale_array[Jentry[nz].col];
292: #else
293:           *(Jentry[nz].valaddr)  = dy[row] * vscale_array[Jentry[nz].col];
294: #endif
295:           nz++;
296:         }
297:       }
298:     }
299:   } else { /* bcols == 1 */
300:     for (k = 0; k < ncolors; k++) {
301:       coloring->currentcolor = k;

303:       /*
304:        (3-1) Loop over each column associated with color
305:        adding the perturbation to the vector w3 = x1 + dx.
306:        */
307:       VecCopy(x1, w3);
308:       VecGetArray(w3, &w3_array);
309:       if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
310:       if (coloring->htype[0] == 'w') {
311:         for (l = 0; l < ncolumns[k]; l++) {
312:           col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
313:           w3_array[col] += 1.0 / dx;
314:         }
315:       } else {                  /* htype == 'ds' */
316:         vscale_array -= cstart; /* shift pointer so global index can be used */
317:         for (l = 0; l < ncolumns[k]; l++) {
318:           col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
319:           w3_array[col] += 1.0 / vscale_array[col];
320:         }
321:         vscale_array += cstart;
322:       }
323:       if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
324:       VecRestoreArray(w3, &w3_array);

326:       /*
327:        (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
328:                            w2 = F(x1 + dx) - F(x1)
329:        */
330:       PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0);
331:       (*f)(sctx, w3, w2, fctx);
332:       PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0);
333:       VecAXPY(w2, -1.0, w1);

335:       /*
336:        (3-3) Loop over rows of vector, putting results into Jacobian matrix
337:        */
338:       nrows_k = nrows[k];
339:       VecGetArray(w2, &y);
340:       if (coloring->htype[0] == 'w') {
341:         for (l = 0; l < nrows_k; l++) {
342:           row = Jentry2[nz].row; /* local row index */
343: #if defined(PETSC_USE_COMPLEX)   /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
344:           PetscScalar *tmp = Jentry2[nz].valaddr;
345:           *tmp             = y[row] * dx;
346: #else
347:           *(Jentry2[nz].valaddr) = y[row] * dx;
348: #endif
349:           nz++;
350:         }
351:       } else { /* htype == 'ds' */
352:         for (l = 0; l < nrows_k; l++) {
353:           row = Jentry[nz].row; /* local row index */
354: #if defined(PETSC_USE_COMPLEX)  /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
355:           PetscScalar *tmp = Jentry[nz].valaddr;
356:           *tmp             = y[row] * vscale_array[Jentry[nz].col];
357: #else
358:           *(Jentry[nz].valaddr)  = y[row] * vscale_array[Jentry[nz].col];
359: #endif
360:           nz++;
361:         }
362:       }
363:       VecRestoreArray(w2, &y);
364:     }
365:   }

367: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
368:   if (J->offloadmask != PETSC_OFFLOAD_UNALLOCATED) J->offloadmask = PETSC_OFFLOAD_CPU;
369: #endif
370:   MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
371:   MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
372:   if (vscale) VecRestoreArray(vscale, &vscale_array);
373:   coloring->currentcolor = -1;
374:   if (!alreadyboundtocpu) VecBindToCPU(x1, PETSC_FALSE);
375:   return 0;
376: }

378: PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
379: {
380:   PetscMPIInt            size, *ncolsonproc, *disp, nn;
381:   PetscInt               i, n, nrows, nrows_i, j, k, m, ncols, col, *rowhit, cstart, cend, colb;
382:   const PetscInt        *is, *A_ci, *A_cj, *B_ci, *B_cj, *row = NULL, *ltog = NULL;
383:   PetscInt               nis = iscoloring->n, nctot, *cols, tmp = 0;
384:   ISLocalToGlobalMapping map   = mat->cmap->mapping;
385:   PetscInt               ctype = c->ctype, *spidxA, *spidxB, nz, bs, bs2, spidx;
386:   Mat                    A, B;
387:   PetscScalar           *A_val, *B_val, **valaddrhit;
388:   MatEntry              *Jentry;
389:   MatEntry2             *Jentry2;
390:   PetscBool              isBAIJ, isSELL;
391:   PetscInt               bcols = c->bcols;
392: #if defined(PETSC_USE_CTABLE)
393:   PetscTable colmap = NULL;
394: #else
395:   PetscInt *colmap = NULL;      /* local col number of off-diag col */
396: #endif

398:   if (ctype == IS_COLORING_LOCAL) {
400:     ISLocalToGlobalMappingGetIndices(map, &ltog);
401:   }

403:   MatGetBlockSize(mat, &bs);
404:   PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ);
405:   PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL);
406:   if (isBAIJ) {
407:     Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
408:     Mat_SeqBAIJ *spA, *spB;
409:     A     = baij->A;
410:     spA   = (Mat_SeqBAIJ *)A->data;
411:     A_val = spA->a;
412:     B     = baij->B;
413:     spB   = (Mat_SeqBAIJ *)B->data;
414:     B_val = spB->a;
415:     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
416:     if (!baij->colmap) MatCreateColmap_MPIBAIJ_Private(mat);
417:     colmap = baij->colmap;
418:     MatGetColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL);
419:     MatGetColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL);

421:     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
422:       PetscInt *garray;
423:       PetscMalloc1(B->cmap->n, &garray);
424:       for (i = 0; i < baij->B->cmap->n / bs; i++) {
425:         for (j = 0; j < bs; j++) garray[i * bs + j] = bs * baij->garray[i] + j;
426:       }
427:       VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, garray, &c->vscale);
428:       VecBindToCPU(c->vscale, PETSC_TRUE);
429:       PetscFree(garray);
430:     }
431:   } else if (isSELL) {
432:     Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
433:     Mat_SeqSELL *spA, *spB;
434:     A     = sell->A;
435:     spA   = (Mat_SeqSELL *)A->data;
436:     A_val = spA->val;
437:     B     = sell->B;
438:     spB   = (Mat_SeqSELL *)B->data;
439:     B_val = spB->val;
440:     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
441:     if (!sell->colmap) {
442:       /* Allow access to data structures of local part of matrix
443:        - creates aij->colmap which maps global column number to local number in part B */
444:       MatCreateColmap_MPISELL_Private(mat);
445:     }
446:     colmap = sell->colmap;
447:     MatGetColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL);
448:     MatGetColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL);

450:     bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */

452:     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
453:       VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, sell->garray, &c->vscale);
454:       VecBindToCPU(c->vscale, PETSC_TRUE);
455:     }
456:   } else {
457:     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
458:     Mat_SeqAIJ *spA, *spB;
459:     A     = aij->A;
460:     spA   = (Mat_SeqAIJ *)A->data;
461:     A_val = spA->a;
462:     B     = aij->B;
463:     spB   = (Mat_SeqAIJ *)B->data;
464:     B_val = spB->a;
465:     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
466:     if (!aij->colmap) {
467:       /* Allow access to data structures of local part of matrix
468:        - creates aij->colmap which maps global column number to local number in part B */
469:       MatCreateColmap_MPIAIJ_Private(mat);
470:     }
471:     colmap = aij->colmap;
472:     MatGetColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL);
473:     MatGetColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL);

475:     bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */

477:     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
478:       VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, aij->garray, &c->vscale);
479:       VecBindToCPU(c->vscale, PETSC_TRUE);
480:     }
481:   }

483:   m      = mat->rmap->n / bs;
484:   cstart = mat->cmap->rstart / bs;
485:   cend   = mat->cmap->rend / bs;

487:   PetscMalloc2(nis, &c->ncolumns, nis, &c->columns);
488:   PetscMalloc1(nis, &c->nrows);

490:   if (c->htype[0] == 'd') {
491:     PetscMalloc1(nz, &Jentry);
492:     c->matentry = Jentry;
493:   } else if (c->htype[0] == 'w') {
494:     PetscMalloc1(nz, &Jentry2);
495:     c->matentry2 = Jentry2;
496:   } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported");

498:   PetscMalloc2(m + 1, &rowhit, m + 1, &valaddrhit);
499:   nz = 0;
500:   ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa);

502:   if (ctype == IS_COLORING_GLOBAL) {
503:     MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size);
504:     PetscMalloc2(size, &ncolsonproc, size, &disp);
505:   }

507:   for (i = 0; i < nis; i++) { /* for each local color */
508:     ISGetLocalSize(c->isa[i], &n);
509:     ISGetIndices(c->isa[i], &is);

511:     c->ncolumns[i] = n; /* local number of columns of this color on this process */
512:     c->columns[i]  = (PetscInt *)is;

514:     if (ctype == IS_COLORING_GLOBAL) {
515:       /* Determine nctot, the total (parallel) number of columns of this color */
516:       /* ncolsonproc[j]: local ncolumns on proc[j] of this color */
517:       PetscMPIIntCast(n, &nn);
518:       MPI_Allgather(&nn, 1, MPI_INT, ncolsonproc, 1, MPI_INT, PetscObjectComm((PetscObject)mat));
519:       nctot = 0;
520:       for (j = 0; j < size; j++) nctot += ncolsonproc[j];
521:       if (!nctot) PetscInfo(mat, "Coloring of matrix has some unneeded colors with no corresponding rows\n");

523:       disp[0] = 0;
524:       for (j = 1; j < size; j++) disp[j] = disp[j - 1] + ncolsonproc[j - 1];

526:       /* Get cols, the complete list of columns for this color on each process */
527:       PetscMalloc1(nctot + 1, &cols);
528:       MPI_Allgatherv((void *)is, n, MPIU_INT, cols, ncolsonproc, disp, MPIU_INT, PetscObjectComm((PetscObject)mat));
529:     } else if (ctype == IS_COLORING_LOCAL) {
530:       /* Determine local number of columns of this color on this process, including ghost points */
531:       nctot = n;
532:       cols  = (PetscInt *)is;
533:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not provided for this MatFDColoring type");

535:     /* Mark all rows affect by these columns */
536:     PetscArrayzero(rowhit, m);
537:     bs2     = bs * bs;
538:     nrows_i = 0;
539:     for (j = 0; j < nctot; j++) { /* loop over columns*/
540:       if (ctype == IS_COLORING_LOCAL) {
541:         col = ltog[cols[j]];
542:       } else {
543:         col = cols[j];
544:       }
545:       if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */
546:         tmp   = A_ci[col - cstart];
547:         row   = A_cj + tmp;
548:         nrows = A_ci[col - cstart + 1] - tmp;
549:         nrows_i += nrows;

551:         /* loop over columns of A marking them in rowhit */
552:         for (k = 0; k < nrows; k++) {
553:           /* set valaddrhit for part A */
554:           spidx            = bs2 * spidxA[tmp + k];
555:           valaddrhit[*row] = &A_val[spidx];
556:           rowhit[*row++]   = col - cstart + 1; /* local column index */
557:         }
558:       } else { /* column is in B, off-diagonal block of mat */
559: #if defined(PETSC_USE_CTABLE)
560:         PetscTableFind(colmap, col + 1, &colb);
561:         colb--;
562: #else
563:         colb = colmap[col] - 1; /* local column index */
564: #endif
565:         if (colb == -1) {
566:           nrows = 0;
567:         } else {
568:           colb  = colb / bs;
569:           tmp   = B_ci[colb];
570:           row   = B_cj + tmp;
571:           nrows = B_ci[colb + 1] - tmp;
572:         }
573:         nrows_i += nrows;
574:         /* loop over columns of B marking them in rowhit */
575:         for (k = 0; k < nrows; k++) {
576:           /* set valaddrhit for part B */
577:           spidx            = bs2 * spidxB[tmp + k];
578:           valaddrhit[*row] = &B_val[spidx];
579:           rowhit[*row++]   = colb + 1 + cend - cstart; /* local column index */
580:         }
581:       }
582:     }
583:     c->nrows[i] = nrows_i;

585:     if (c->htype[0] == 'd') {
586:       for (j = 0; j < m; j++) {
587:         if (rowhit[j]) {
588:           Jentry[nz].row     = j;             /* local row index */
589:           Jentry[nz].col     = rowhit[j] - 1; /* local column index */
590:           Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
591:           nz++;
592:         }
593:       }
594:     } else { /* c->htype == 'wp' */
595:       for (j = 0; j < m; j++) {
596:         if (rowhit[j]) {
597:           Jentry2[nz].row     = j;             /* local row index */
598:           Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
599:           nz++;
600:         }
601:       }
602:     }
603:     if (ctype == IS_COLORING_GLOBAL) PetscFree(cols);
604:   }
605:   if (ctype == IS_COLORING_GLOBAL) PetscFree2(ncolsonproc, disp);

607:   if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */
608:     MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz);
609:   }

611:   if (isBAIJ) {
612:     MatRestoreColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL);
613:     MatRestoreColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL);
614:     PetscMalloc1(bs * mat->rmap->n, &c->dy);
615:   } else if (isSELL) {
616:     MatRestoreColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL);
617:     MatRestoreColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL);
618:   } else {
619:     MatRestoreColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL);
620:     MatRestoreColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL);
621:   }

623:   ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa);
624:   PetscFree2(rowhit, valaddrhit);

626:   if (ctype == IS_COLORING_LOCAL) ISLocalToGlobalMappingRestoreIndices(map, &ltog);
627:   PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols);
628:   return 0;
629: }

631: PetscErrorCode MatFDColoringCreate_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
632: {
633:   PetscInt  bs, nis = iscoloring->n, m = mat->rmap->n;
634:   PetscBool isBAIJ, isSELL;

636:   /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian;
637:    bcols is chosen s.t. dy-array takes 50% of memory space as mat */
638:   MatGetBlockSize(mat, &bs);
639:   PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ);
640:   PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL);
641:   if (isBAIJ || m == 0) {
642:     c->brows = m;
643:     c->bcols = 1;
644:   } else if (isSELL) {
645:     /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
646:     Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
647:     Mat_SeqSELL *spA, *spB;
648:     Mat          A, B;
649:     PetscInt     nz, brows, bcols;
650:     PetscReal    mem;

652:     bs = 1; /* only bs=1 is supported for MPISELL matrix */

654:     A     = sell->A;
655:     spA   = (Mat_SeqSELL *)A->data;
656:     B     = sell->B;
657:     spB   = (Mat_SeqSELL *)B->data;
658:     nz    = spA->nz + spB->nz; /* total local nonzero entries of mat */
659:     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
660:     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
661:     brows = 1000 / bcols;
662:     if (bcols > nis) bcols = nis;
663:     if (brows == 0 || brows > m) brows = m;
664:     c->brows = brows;
665:     c->bcols = bcols;
666:   } else { /* mpiaij matrix */
667:     /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
668:     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
669:     Mat_SeqAIJ *spA, *spB;
670:     Mat         A, B;
671:     PetscInt    nz, brows, bcols;
672:     PetscReal   mem;

674:     bs = 1; /* only bs=1 is supported for MPIAIJ matrix */

676:     A     = aij->A;
677:     spA   = (Mat_SeqAIJ *)A->data;
678:     B     = aij->B;
679:     spB   = (Mat_SeqAIJ *)B->data;
680:     nz    = spA->nz + spB->nz; /* total local nonzero entries of mat */
681:     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
682:     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
683:     brows = 1000 / bcols;
684:     if (bcols > nis) bcols = nis;
685:     if (brows == 0 || brows > m) brows = m;
686:     c->brows = brows;
687:     c->bcols = bcols;
688:   }

690:   c->M       = mat->rmap->N / bs; /* set the global rows and columns and local rows */
691:   c->N       = mat->cmap->N / bs;
692:   c->m       = mat->rmap->n / bs;
693:   c->rstart  = mat->rmap->rstart / bs;
694:   c->ncolors = nis;
695:   return 0;
696: }

698: /*@C

700:     MatFDColoringSetValues - takes a matrix in compressed color format and enters the matrix into a PETSc `Mat`

702:    Collective

704:    Input Parameters:
705: +    J - the sparse matrix
706: .    coloring - created with `MatFDColoringCreate()` and a local coloring
707: -    y - column major storage of matrix values with one color of values per column, the number of rows of y should match
708:          the number of local rows of J and the number of columns is the number of colors.

710:    Level: intermediate

712:    Notes: the matrix in compressed color format may come from an automatic differentiation code

714:    The code will be slightly faster if `MatFDColoringSetBlockSize`(coloring,`PETSC_DEFAULT`,nc); is called immediately after creating the coloring

716: .seealso: `MatFDColoringCreate()`, `ISColoring`, `ISColoringCreate()`, `ISColoringSetType()`, `IS_COLORING_LOCAL`, `MatFDColoringSetBlockSize()`
717: @*/
718: PetscErrorCode MatFDColoringSetValues(Mat J, MatFDColoring coloring, const PetscScalar *y)
719: {
720:   MatEntry2      *Jentry2;
721:   PetscInt        row, i, nrows_k, l, ncolors, nz = 0, bcols, nbcols = 0;
722:   const PetscInt *nrows;
723:   PetscBool       eq;

727:   PetscObjectCompareId((PetscObject)J, coloring->matid, &eq);
729:   Jentry2 = coloring->matentry2;
730:   nrows   = coloring->nrows;
731:   ncolors = coloring->ncolors;
732:   bcols   = coloring->bcols;

734:   for (i = 0; i < ncolors; i += bcols) {
735:     nrows_k = nrows[nbcols++];
736:     for (l = 0; l < nrows_k; l++) {
737:       row                      = Jentry2[nz].row; /* local row index */
738:       *(Jentry2[nz++].valaddr) = y[row];
739:     }
740:     y += bcols * coloring->m;
741:   }
742:   MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
743:   MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
744:   return 0;
745: }