Actual source code: nn.c


  2: #include <../src/ksp/pc/impls/is/nn/nn.h>

  4: /* -------------------------------------------------------------------------- */
  5: /*
  6:    PCSetUp_NN - Prepares for the use of the NN preconditioner
  7:                     by setting data structures and options.

  9:    Input Parameter:
 10: .  pc - the preconditioner context

 12:    Application Interface Routine: PCSetUp()

 14:    Note:
 15:    The interface routine PCSetUp() is not usually called directly by
 16:    the user, but instead is called by PCApply() if necessary.
 17: */
 18: static PetscErrorCode PCSetUp_NN(PC pc)
 19: {
 20:   if (!pc->setupcalled) {
 21:     /* Set up all the "iterative substructuring" common block */
 22:     PCISSetUp(pc, PETSC_TRUE, PETSC_TRUE);
 23:     /* Create the coarse matrix. */
 24:     PCNNCreateCoarseMatrix(pc);
 25:   }
 26:   return 0;
 27: }

 29: /* -------------------------------------------------------------------------- */
 30: /*
 31:    PCApply_NN - Applies the NN preconditioner to a vector.

 33:    Input Parameters:
 34: .  pc - the preconditioner context
 35: .  r - input vector (global)

 37:    Output Parameter:
 38: .  z - output vector (global)

 40:    Application Interface Routine: PCApply()
 41:  */
 42: static PetscErrorCode PCApply_NN(PC pc, Vec r, Vec z)
 43: {
 44:   PC_IS      *pcis  = (PC_IS *)(pc->data);
 45:   PetscScalar m_one = -1.0;
 46:   Vec         w     = pcis->vec1_global;

 48:   /*
 49:     Dirichlet solvers.
 50:     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
 51:     Storing the local results at vec2_D
 52:   */
 53:   VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD);
 54:   VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD);
 55:   KSPSolve(pcis->ksp_D, pcis->vec1_D, pcis->vec2_D);

 57:   /*
 58:     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
 59:     Storing the result in the interface portion of the global vector w.
 60:   */
 61:   MatMult(pcis->A_BI, pcis->vec2_D, pcis->vec1_B);
 62:   VecScale(pcis->vec1_B, m_one);
 63:   VecCopy(r, w);
 64:   VecScatterBegin(pcis->global_to_B, pcis->vec1_B, w, ADD_VALUES, SCATTER_REVERSE);
 65:   VecScatterEnd(pcis->global_to_B, pcis->vec1_B, w, ADD_VALUES, SCATTER_REVERSE);

 67:   /*
 68:     Apply the interface preconditioner
 69:   */
 70:   PCNNApplyInterfacePreconditioner(pc, w, z, pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec3_B, pcis->vec1_D, pcis->vec3_D, pcis->vec1_N, pcis->vec2_N);

 72:   /*
 73:     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
 74:     The result is stored in vec1_D.
 75:   */
 76:   VecScatterBegin(pcis->global_to_B, z, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
 77:   VecScatterEnd(pcis->global_to_B, z, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
 78:   MatMult(pcis->A_IB, pcis->vec1_B, pcis->vec1_D);

 80:   /*
 81:     Dirichlet solvers.
 82:     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
 83:     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
 84:   */
 85:   VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE);
 86:   VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE);
 87:   KSPSolve(pcis->ksp_D, pcis->vec1_D, pcis->vec2_D);
 88:   VecScale(pcis->vec2_D, m_one);
 89:   VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, ADD_VALUES, SCATTER_REVERSE);
 90:   VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, ADD_VALUES, SCATTER_REVERSE);
 91:   return 0;
 92: }

 94: /* -------------------------------------------------------------------------- */
 95: /*
 96:    PCDestroy_NN - Destroys the private context for the NN preconditioner
 97:    that was created with PCCreate_NN().

 99:    Input Parameter:
100: .  pc - the preconditioner context

102:    Application Interface Routine: PCDestroy()
103: */
104: static PetscErrorCode PCDestroy_NN(PC pc)
105: {
106:   PC_NN *pcnn = (PC_NN *)pc->data;

108:   PCISDestroy(pc);

110:   MatDestroy(&pcnn->coarse_mat);
111:   VecDestroy(&pcnn->coarse_x);
112:   VecDestroy(&pcnn->coarse_b);
113:   KSPDestroy(&pcnn->ksp_coarse);
114:   if (pcnn->DZ_IN) {
115:     PetscFree(pcnn->DZ_IN[0]);
116:     PetscFree(pcnn->DZ_IN);
117:   }

119:   /*
120:       Free the private data structure that was hanging off the PC
121:   */
122:   PetscFree(pc->data);
123:   return 0;
124: }

126: /*MC
127:    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.

129:    Options Database Keys:
130: +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
131:                                        (this skips the first coarse grid solve in the preconditioner)
132: .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
133:                                        (this skips the second coarse grid solve in the preconditioner)
134: .    -pc_is_damp_fixed <fact> -
135: .    -pc_is_remove_nullspace_fixed -
136: .    -pc_is_set_damping_factor_floating <fact> -
137: .    -pc_is_not_damp_floating -
138: -    -pc_is_not_remove_nullspace_floating -

140:    Options Database prefixes for the subsolvers this preconditioner uses:
141: +  -nn_coarse_pc_ - for the coarse grid preconditioner
142: .  -is_localD_pc_ - for the Dirichlet subproblem preconditioner
143: -  -is_localN_pc_ - for the Neumann subproblem preconditioner

145:    Level: intermediate

147:    Notes:
148:     The matrix used with this preconditioner must be of type `MATIS`

150:           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
151:           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
152:           on the subdomains; though in our experience using approximate solvers is slower.).

154:    Contributed by Paulo Goldfeld

156: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `MATIS`, `PCBDDC`
157: M*/

159: PETSC_EXTERN PetscErrorCode PCCreate_NN(PC pc)
160: {
161:   PC_NN *pcnn;

163:   /*
164:      Creates the private data structure for this preconditioner and
165:      attach it to the PC object.
166:   */
167:   PetscNew(&pcnn);
168:   pc->data = (void *)pcnn;

170:   PCISCreate(pc);
171:   pcnn->coarse_mat = NULL;
172:   pcnn->coarse_x   = NULL;
173:   pcnn->coarse_b   = NULL;
174:   pcnn->ksp_coarse = NULL;
175:   pcnn->DZ_IN      = NULL;

177:   /*
178:       Set the pointers for the functions that are provided above.
179:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
180:       are called, they will automatically call these functions.  Note we
181:       choose not to provide a couple of these functions since they are
182:       not needed.
183:   */
184:   pc->ops->apply               = PCApply_NN;
185:   pc->ops->applytranspose      = NULL;
186:   pc->ops->setup               = PCSetUp_NN;
187:   pc->ops->destroy             = PCDestroy_NN;
188:   pc->ops->view                = NULL;
189:   pc->ops->applyrichardson     = NULL;
190:   pc->ops->applysymmetricleft  = NULL;
191:   pc->ops->applysymmetricright = NULL;
192:   return 0;
193: }

195: /*
196:    PCNNCreateCoarseMatrix -
197: */
198: PetscErrorCode PCNNCreateCoarseMatrix(PC pc)
199: {
200:   MPI_Request  *send_request, *recv_request;
201:   PetscInt      i, j, k;
202:   PetscScalar  *mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
203:   PetscScalar **DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */

205:   /* aliasing some names */
206:   PC_IS        *pcis     = (PC_IS *)(pc->data);
207:   PC_NN        *pcnn     = (PC_NN *)pc->data;
208:   PetscInt      n_neigh  = pcis->n_neigh;
209:   PetscInt     *neigh    = pcis->neigh;
210:   PetscInt     *n_shared = pcis->n_shared;
211:   PetscInt    **shared   = pcis->shared;
212:   PetscScalar **DZ_IN; /* Must be initialized after memory allocation. */

214:   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
215:   PetscMalloc1(n_neigh * n_neigh + 1, &mat);

217:   /* Allocate memory for DZ */
218:   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
219:   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
220:   {
221:     PetscInt size_of_Z = 0;
222:     PetscMalloc((n_neigh + 1) * sizeof(PetscScalar *), &pcnn->DZ_IN);
223:     DZ_IN = pcnn->DZ_IN;
224:     PetscMalloc((n_neigh + 1) * sizeof(PetscScalar *), &DZ_OUT);
225:     for (i = 0; i < n_neigh; i++) size_of_Z += n_shared[i];
226:     PetscMalloc((size_of_Z + 1) * sizeof(PetscScalar), &DZ_IN[0]);
227:     PetscMalloc((size_of_Z + 1) * sizeof(PetscScalar), &DZ_OUT[0]);
228:   }
229:   for (i = 1; i < n_neigh; i++) {
230:     DZ_IN[i]  = DZ_IN[i - 1] + n_shared[i - 1];
231:     DZ_OUT[i] = DZ_OUT[i - 1] + n_shared[i - 1];
232:   }

234:   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
235:   /* First, set the auxiliary array pcis->work_N. */
236:   PCISScatterArrayNToVecB(pcis->work_N, pcis->D, INSERT_VALUES, SCATTER_REVERSE, pc);
237:   for (i = 1; i < n_neigh; i++) {
238:     for (j = 0; j < n_shared[i]; j++) DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
239:   }

241:   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
242:   /* Notice that send_request[] and recv_request[] could have one less element. */
243:   /* We make them longer to have request[i] corresponding to neigh[i].          */
244:   {
245:     PetscMPIInt tag;
246:     PetscObjectGetNewTag((PetscObject)pc, &tag);
247:     PetscMalloc2(n_neigh + 1, &send_request, n_neigh + 1, &recv_request);
248:     for (i = 1; i < n_neigh; i++) {
249:       MPI_Isend((void *)(DZ_OUT[i]), n_shared[i], MPIU_SCALAR, neigh[i], tag, PetscObjectComm((PetscObject)pc), &(send_request[i]));
250:       MPI_Irecv((void *)(DZ_IN[i]), n_shared[i], MPIU_SCALAR, neigh[i], tag, PetscObjectComm((PetscObject)pc), &(recv_request[i]));
251:     }
252:   }

254:   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
255:   for (j = 0; j < n_shared[0]; j++) DZ_IN[0][j] = pcis->work_N[shared[0][j]];

257:   /* Start computing with local D*Z while communication goes on.    */
258:   /* Apply Schur complement. The result is "stored" in vec (more    */
259:   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
260:   /* and also scattered to pcnn->work_N.                            */
261:   PCNNApplySchurToChunk(pc, n_shared[0], shared[0], DZ_IN[0], pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec1_D, pcis->vec2_D);

263:   /* Compute the first column, while completing the receiving. */
264:   for (i = 0; i < n_neigh; i++) {
265:     MPI_Status  stat;
266:     PetscMPIInt ind = 0;
267:     if (i > 0) {
268:       MPI_Waitany(n_neigh - 1, recv_request + 1, &ind, &stat);
269:       ind++;
270:     }
271:     mat[ind * n_neigh + 0] = 0.0;
272:     for (k = 0; k < n_shared[ind]; k++) mat[ind * n_neigh + 0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
273:   }

275:   /* Compute the remaining of the columns */
276:   for (j = 1; j < n_neigh; j++) {
277:     PCNNApplySchurToChunk(pc, n_shared[j], shared[j], DZ_IN[j], pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec1_D, pcis->vec2_D);
278:     for (i = 0; i < n_neigh; i++) {
279:       mat[i * n_neigh + j] = 0.0;
280:       for (k = 0; k < n_shared[i]; k++) mat[i * n_neigh + j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
281:     }
282:   }

284:   /* Complete the sending. */
285:   if (n_neigh > 1) {
286:     MPI_Status *stat;
287:     PetscMalloc1(n_neigh - 1, &stat);
288:     if (n_neigh - 1) MPI_Waitall(n_neigh - 1, &(send_request[1]), stat);
289:     PetscFree(stat);
290:   }

292:   /* Free the memory for the MPI requests */
293:   PetscFree2(send_request, recv_request);

295:   /* Free the memory for DZ_OUT */
296:   if (DZ_OUT) {
297:     PetscFree(DZ_OUT[0]);
298:     PetscFree(DZ_OUT);
299:   }

301:   {
302:     PetscMPIInt size;
303:     MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
304:     /* Create the global coarse vectors (rhs and solution). */
305:     VecCreateMPI(PetscObjectComm((PetscObject)pc), 1, size, &(pcnn->coarse_b));
306:     VecDuplicate(pcnn->coarse_b, &(pcnn->coarse_x));
307:     /* Create and set the global coarse AIJ matrix. */
308:     MatCreate(PetscObjectComm((PetscObject)pc), &(pcnn->coarse_mat));
309:     MatSetSizes(pcnn->coarse_mat, 1, 1, size, size);
310:     MatSetType(pcnn->coarse_mat, MATAIJ);
311:     MatSeqAIJSetPreallocation(pcnn->coarse_mat, 1, NULL);
312:     MatMPIAIJSetPreallocation(pcnn->coarse_mat, 1, NULL, n_neigh, NULL);
313:     MatSetOption(pcnn->coarse_mat, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
314:     MatSetOption(pcnn->coarse_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE);
315:     MatSetValues(pcnn->coarse_mat, n_neigh, neigh, n_neigh, neigh, mat, ADD_VALUES);
316:     MatAssemblyBegin(pcnn->coarse_mat, MAT_FINAL_ASSEMBLY);
317:     MatAssemblyEnd(pcnn->coarse_mat, MAT_FINAL_ASSEMBLY);
318:   }

320:   {
321:     PetscMPIInt rank;
322:     PetscScalar one = 1.0;
323:     MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
324:     /* "Zero out" rows of not-purely-Neumann subdomains */
325:     if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
326:       MatZeroRows(pcnn->coarse_mat, 0, NULL, one, NULL, NULL);
327:     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
328:       PetscInt row = (PetscInt)rank;
329:       MatZeroRows(pcnn->coarse_mat, 1, &row, one, NULL, NULL);
330:     }
331:   }

333:   /* Create the coarse linear solver context */
334:   {
335:     PC  pc_ctx, inner_pc;
336:     KSP inner_ksp;

338:     KSPCreate(PetscObjectComm((PetscObject)pc), &pcnn->ksp_coarse);
339:     PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse, (PetscObject)pc, 2);
340:     KSPSetOperators(pcnn->ksp_coarse, pcnn->coarse_mat, pcnn->coarse_mat);
341:     KSPGetPC(pcnn->ksp_coarse, &pc_ctx);
342:     PCSetType(pc_ctx, PCREDUNDANT);
343:     KSPSetType(pcnn->ksp_coarse, KSPPREONLY);
344:     PCRedundantGetKSP(pc_ctx, &inner_ksp);
345:     KSPGetPC(inner_ksp, &inner_pc);
346:     PCSetType(inner_pc, PCLU);
347:     KSPSetOptionsPrefix(pcnn->ksp_coarse, "nn_coarse_");
348:     KSPSetFromOptions(pcnn->ksp_coarse);
349:     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
350:     KSPSetUp(pcnn->ksp_coarse);
351:   }

353:   /* Free the memory for mat */
354:   PetscFree(mat);

356:   /* for DEBUGGING, save the coarse matrix to a file. */
357:   {
358:     PetscBool flg = PETSC_FALSE;
359:     PetscOptionsGetBool(NULL, NULL, "-pc_nn_save_coarse_matrix", &flg, NULL);
360:     if (flg) {
361:       PetscViewer viewer;
362:       PetscViewerASCIIOpen(PETSC_COMM_WORLD, "coarse.m", &viewer);
363:       PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB);
364:       MatView(pcnn->coarse_mat, viewer);
365:       PetscViewerPopFormat(viewer);
366:       PetscViewerDestroy(&viewer);
367:     }
368:   }

370:   /*  Set the variable pcnn->factor_coarse_rhs. */
371:   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;

373:   /* See historical note 02, at the bottom of this file. */
374:   return 0;
375: }

377: /*
378:    PCNNApplySchurToChunk -

380:    Input parameters:
381: .  pcnn
382: .  n - size of chunk
383: .  idx - indices of chunk
384: .  chunk - values

386:    Output parameters:
387: .  array_N - result of Schur complement applied to chunk, scattered to big array
388: .  vec1_B  - result of Schur complement applied to chunk
389: .  vec2_B  - garbage (used as work space)
390: .  vec1_D  - garbage (used as work space)
391: .  vec2_D  - garbage (used as work space)

393: */
394: PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt *idx, PetscScalar *chunk, PetscScalar *array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
395: {
396:   PetscInt i;
397:   PC_IS   *pcis = (PC_IS *)(pc->data);

399:   PetscArrayzero(array_N, pcis->n);
400:   for (i = 0; i < n; i++) array_N[idx[i]] = chunk[i];
401:   PCISScatterArrayNToVecB(array_N, vec2_B, INSERT_VALUES, SCATTER_FORWARD, pc);
402:   PCISApplySchur(pc, vec2_B, vec1_B, (Vec)0, vec1_D, vec2_D);
403:   PCISScatterArrayNToVecB(array_N, vec1_B, INSERT_VALUES, SCATTER_REVERSE, pc);
404:   return 0;
405: }

407: /*
408:    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
409:                                       the preconditioner for the Schur complement.

411:    Input parameter:
412: .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.

414:    Output parameters:
415: .  z - global vector of interior and interface nodes. The values on the interface are the result of
416:        the application of the interface preconditioner to the interface part of r. The values on the
417:        interior nodes are garbage.
418: .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
419: .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
420: .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
421: .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
422: .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
423: .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
424: .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
425: .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)

427: */
428: PetscErrorCode PCNNApplyInterfacePreconditioner(PC pc, Vec r, Vec z, PetscScalar *work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, Vec vec2_D, Vec vec1_N, Vec vec2_N)
429: {
430:   PC_IS *pcis = (PC_IS *)(pc->data);

432:   /*
433:     First balancing step.
434:   */
435:   {
436:     PetscBool flg = PETSC_FALSE;
437:     PetscOptionsGetBool(NULL, NULL, "-pc_nn_turn_off_first_balancing", &flg, NULL);
438:     if (!flg) {
439:       PCNNBalancing(pc, r, (Vec)0, z, vec1_B, vec2_B, (Vec)0, vec1_D, vec2_D, work_N);
440:     } else {
441:       VecCopy(r, z);
442:     }
443:   }

445:   /*
446:     Extract the local interface part of z and scale it by D
447:   */
448:   VecScatterBegin(pcis->global_to_B, z, vec1_B, INSERT_VALUES, SCATTER_FORWARD);
449:   VecScatterEnd(pcis->global_to_B, z, vec1_B, INSERT_VALUES, SCATTER_FORWARD);
450:   VecPointwiseMult(vec2_B, pcis->D, vec1_B);

452:   /* Neumann Solver */
453:   PCISApplyInvSchur(pc, vec2_B, vec1_B, vec1_N, vec2_N);

455:   /*
456:     Second balancing step.
457:   */
458:   {
459:     PetscBool flg = PETSC_FALSE;
460:     PetscOptionsGetBool(NULL, NULL, "-pc_turn_off_second_balancing", &flg, NULL);
461:     if (!flg) {
462:       PCNNBalancing(pc, r, vec1_B, z, vec2_B, vec3_B, (Vec)0, vec1_D, vec2_D, work_N);
463:     } else {
464:       VecPointwiseMult(vec2_B, pcis->D, vec1_B);
465:       VecSet(z, 0.0);
466:       VecScatterBegin(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE);
467:       VecScatterEnd(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE);
468:     }
469:   }
470:   return 0;
471: }

473: /*
474:    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
475:                    input argument u is provided), or s, as given in equations
476:                    (12) and (13), if the input argument u is a null vector.
477:                    Notice that the input argument u plays the role of u_i in
478:                    equation (14). The equation numbers refer to [Man93].

480:    Input Parameters:
481: .  pcnn - NN preconditioner context.
482: .  r - MPI vector of all nodes (interior and interface). It's preserved.
483: .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.

485:    Output Parameters:
486: .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
487: .  vec1_B - Sequential vector of local interface nodes. Workspace.
488: .  vec2_B - Sequential vector of local interface nodes. Workspace.
489: .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
490: .  vec1_D - Sequential vector of local interior nodes. Workspace.
491: .  vec2_D - Sequential vector of local interior nodes. Workspace.
492: .  work_N - Array of all local nodes (interior and interface). Workspace.

494: */
495: PetscErrorCode PCNNBalancing(PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
496: {
497:   PetscInt     k;
498:   PetscScalar  value;
499:   PetscScalar *lambda;
500:   PC_NN       *pcnn = (PC_NN *)(pc->data);
501:   PC_IS       *pcis = (PC_IS *)(pc->data);

503:   PetscLogEventBegin(PC_ApplyCoarse, pc, 0, 0, 0);
504:   if (u) {
505:     if (!vec3_B) vec3_B = u;
506:     VecPointwiseMult(vec1_B, pcis->D, u);
507:     VecSet(z, 0.0);
508:     VecScatterBegin(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE);
509:     VecScatterEnd(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE);
510:     VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD);
511:     VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD);
512:     PCISApplySchur(pc, vec2_B, vec3_B, (Vec)0, vec1_D, vec2_D);
513:     VecScale(vec3_B, -1.0);
514:     VecCopy(r, z);
515:     VecScatterBegin(pcis->global_to_B, vec3_B, z, ADD_VALUES, SCATTER_REVERSE);
516:     VecScatterEnd(pcis->global_to_B, vec3_B, z, ADD_VALUES, SCATTER_REVERSE);
517:   } else {
518:     VecCopy(r, z);
519:   }
520:   VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD);
521:   VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD);
522:   PCISScatterArrayNToVecB(work_N, vec2_B, INSERT_VALUES, SCATTER_REVERSE, pc);
523:   for (k = 0, value = 0.0; k < pcis->n_shared[0]; k++) value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]];
524:   value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */
525:   {
526:     PetscMPIInt rank;
527:     MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
528:     VecSetValue(pcnn->coarse_b, rank, value, INSERT_VALUES);
529:     /*
530:        Since we are only inserting local values (one value actually) we don't need to do the
531:        reduction that tells us there is no data that needs to be moved. Hence we comment out these
532:        VecAssemblyBegin(pcnn->coarse_b);
533:        VecAssemblyEnd  (pcnn->coarse_b);
534:     */
535:   }
536:   KSPSolve(pcnn->ksp_coarse, pcnn->coarse_b, pcnn->coarse_x);
537:   if (!u) VecScale(pcnn->coarse_x, -1.0);
538:   VecGetArray(pcnn->coarse_x, &lambda);
539:   for (k = 0; k < pcis->n_shared[0]; k++) work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k];
540:   VecRestoreArray(pcnn->coarse_x, &lambda);
541:   PCISScatterArrayNToVecB(work_N, vec2_B, INSERT_VALUES, SCATTER_FORWARD, pc);
542:   VecSet(z, 0.0);
543:   VecScatterBegin(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE);
544:   VecScatterEnd(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE);
545:   if (!u) {
546:     VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD);
547:     VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD);
548:     PCISApplySchur(pc, vec2_B, vec1_B, (Vec)0, vec1_D, vec2_D);
549:     VecCopy(r, z);
550:   }
551:   VecScatterBegin(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE);
552:   VecScatterEnd(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE);
553:   PetscLogEventEnd(PC_ApplyCoarse, pc, 0, 0, 0);
554:   return 0;
555: }

557: /*                                                     */
558: /*  From now on, "footnotes" (or "historical notes").  */
559: /*                                                     */
560: /*
561:    Historical note 01

563:    We considered the possibility of an alternative D_i that would still
564:    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
565:    The basic principle was still the pseudo-inverse of the counting
566:    function; the difference was that we would not count subdomains
567:    that do not contribute to the coarse space (i.e., not pure-Neumann
568:    subdomains).

570:    This turned out to be a bad idea:  we would solve trivial Neumann
571:    problems in the not pure-Neumann subdomains, since we would be scaling
572:    the balanced residual by zero.
573: */

575: /*
576:    Historical note 02

578:    We tried an alternative coarse problem, that would eliminate exactly a
579:    constant error. Turned out not to improve the overall convergence.
580: */