Actual source code: nn.c
2: #include <../src/ksp/pc/impls/is/nn/nn.h>
4: /* -------------------------------------------------------------------------- */
5: /*
6: PCSetUp_NN - Prepares for the use of the NN preconditioner
7: by setting data structures and options.
9: Input Parameter:
10: . pc - the preconditioner context
12: Application Interface Routine: PCSetUp()
14: Note:
15: The interface routine PCSetUp() is not usually called directly by
16: the user, but instead is called by PCApply() if necessary.
17: */
18: static PetscErrorCode PCSetUp_NN(PC pc)
19: {
20: if (!pc->setupcalled) {
21: /* Set up all the "iterative substructuring" common block */
22: PCISSetUp(pc, PETSC_TRUE, PETSC_TRUE);
23: /* Create the coarse matrix. */
24: PCNNCreateCoarseMatrix(pc);
25: }
26: return 0;
27: }
29: /* -------------------------------------------------------------------------- */
30: /*
31: PCApply_NN - Applies the NN preconditioner to a vector.
33: Input Parameters:
34: . pc - the preconditioner context
35: . r - input vector (global)
37: Output Parameter:
38: . z - output vector (global)
40: Application Interface Routine: PCApply()
41: */
42: static PetscErrorCode PCApply_NN(PC pc, Vec r, Vec z)
43: {
44: PC_IS *pcis = (PC_IS *)(pc->data);
45: PetscScalar m_one = -1.0;
46: Vec w = pcis->vec1_global;
48: /*
49: Dirichlet solvers.
50: Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
51: Storing the local results at vec2_D
52: */
53: VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD);
54: VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD);
55: KSPSolve(pcis->ksp_D, pcis->vec1_D, pcis->vec2_D);
57: /*
58: Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
59: Storing the result in the interface portion of the global vector w.
60: */
61: MatMult(pcis->A_BI, pcis->vec2_D, pcis->vec1_B);
62: VecScale(pcis->vec1_B, m_one);
63: VecCopy(r, w);
64: VecScatterBegin(pcis->global_to_B, pcis->vec1_B, w, ADD_VALUES, SCATTER_REVERSE);
65: VecScatterEnd(pcis->global_to_B, pcis->vec1_B, w, ADD_VALUES, SCATTER_REVERSE);
67: /*
68: Apply the interface preconditioner
69: */
70: PCNNApplyInterfacePreconditioner(pc, w, z, pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec3_B, pcis->vec1_D, pcis->vec3_D, pcis->vec1_N, pcis->vec2_N);
72: /*
73: Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
74: The result is stored in vec1_D.
75: */
76: VecScatterBegin(pcis->global_to_B, z, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
77: VecScatterEnd(pcis->global_to_B, z, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
78: MatMult(pcis->A_IB, pcis->vec1_B, pcis->vec1_D);
80: /*
81: Dirichlet solvers.
82: Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
83: $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
84: */
85: VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE);
86: VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE);
87: KSPSolve(pcis->ksp_D, pcis->vec1_D, pcis->vec2_D);
88: VecScale(pcis->vec2_D, m_one);
89: VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, ADD_VALUES, SCATTER_REVERSE);
90: VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, ADD_VALUES, SCATTER_REVERSE);
91: return 0;
92: }
94: /* -------------------------------------------------------------------------- */
95: /*
96: PCDestroy_NN - Destroys the private context for the NN preconditioner
97: that was created with PCCreate_NN().
99: Input Parameter:
100: . pc - the preconditioner context
102: Application Interface Routine: PCDestroy()
103: */
104: static PetscErrorCode PCDestroy_NN(PC pc)
105: {
106: PC_NN *pcnn = (PC_NN *)pc->data;
108: PCISDestroy(pc);
110: MatDestroy(&pcnn->coarse_mat);
111: VecDestroy(&pcnn->coarse_x);
112: VecDestroy(&pcnn->coarse_b);
113: KSPDestroy(&pcnn->ksp_coarse);
114: if (pcnn->DZ_IN) {
115: PetscFree(pcnn->DZ_IN[0]);
116: PetscFree(pcnn->DZ_IN);
117: }
119: /*
120: Free the private data structure that was hanging off the PC
121: */
122: PetscFree(pc->data);
123: return 0;
124: }
126: /*MC
127: PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
129: Options Database Keys:
130: + -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
131: (this skips the first coarse grid solve in the preconditioner)
132: . -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
133: (this skips the second coarse grid solve in the preconditioner)
134: . -pc_is_damp_fixed <fact> -
135: . -pc_is_remove_nullspace_fixed -
136: . -pc_is_set_damping_factor_floating <fact> -
137: . -pc_is_not_damp_floating -
138: - -pc_is_not_remove_nullspace_floating -
140: Options Database prefixes for the subsolvers this preconditioner uses:
141: + -nn_coarse_pc_ - for the coarse grid preconditioner
142: . -is_localD_pc_ - for the Dirichlet subproblem preconditioner
143: - -is_localN_pc_ - for the Neumann subproblem preconditioner
145: Level: intermediate
147: Notes:
148: The matrix used with this preconditioner must be of type `MATIS`
150: Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
151: degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
152: on the subdomains; though in our experience using approximate solvers is slower.).
154: Contributed by Paulo Goldfeld
156: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `MATIS`, `PCBDDC`
157: M*/
159: PETSC_EXTERN PetscErrorCode PCCreate_NN(PC pc)
160: {
161: PC_NN *pcnn;
163: /*
164: Creates the private data structure for this preconditioner and
165: attach it to the PC object.
166: */
167: PetscNew(&pcnn);
168: pc->data = (void *)pcnn;
170: PCISCreate(pc);
171: pcnn->coarse_mat = NULL;
172: pcnn->coarse_x = NULL;
173: pcnn->coarse_b = NULL;
174: pcnn->ksp_coarse = NULL;
175: pcnn->DZ_IN = NULL;
177: /*
178: Set the pointers for the functions that are provided above.
179: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
180: are called, they will automatically call these functions. Note we
181: choose not to provide a couple of these functions since they are
182: not needed.
183: */
184: pc->ops->apply = PCApply_NN;
185: pc->ops->applytranspose = NULL;
186: pc->ops->setup = PCSetUp_NN;
187: pc->ops->destroy = PCDestroy_NN;
188: pc->ops->view = NULL;
189: pc->ops->applyrichardson = NULL;
190: pc->ops->applysymmetricleft = NULL;
191: pc->ops->applysymmetricright = NULL;
192: return 0;
193: }
195: /*
196: PCNNCreateCoarseMatrix -
197: */
198: PetscErrorCode PCNNCreateCoarseMatrix(PC pc)
199: {
200: MPI_Request *send_request, *recv_request;
201: PetscInt i, j, k;
202: PetscScalar *mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */
203: PetscScalar **DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
205: /* aliasing some names */
206: PC_IS *pcis = (PC_IS *)(pc->data);
207: PC_NN *pcnn = (PC_NN *)pc->data;
208: PetscInt n_neigh = pcis->n_neigh;
209: PetscInt *neigh = pcis->neigh;
210: PetscInt *n_shared = pcis->n_shared;
211: PetscInt **shared = pcis->shared;
212: PetscScalar **DZ_IN; /* Must be initialized after memory allocation. */
214: /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
215: PetscMalloc1(n_neigh * n_neigh + 1, &mat);
217: /* Allocate memory for DZ */
218: /* Notice that DZ_OUT[0] is allocated some space that is never used. */
219: /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
220: {
221: PetscInt size_of_Z = 0;
222: PetscMalloc((n_neigh + 1) * sizeof(PetscScalar *), &pcnn->DZ_IN);
223: DZ_IN = pcnn->DZ_IN;
224: PetscMalloc((n_neigh + 1) * sizeof(PetscScalar *), &DZ_OUT);
225: for (i = 0; i < n_neigh; i++) size_of_Z += n_shared[i];
226: PetscMalloc((size_of_Z + 1) * sizeof(PetscScalar), &DZ_IN[0]);
227: PetscMalloc((size_of_Z + 1) * sizeof(PetscScalar), &DZ_OUT[0]);
228: }
229: for (i = 1; i < n_neigh; i++) {
230: DZ_IN[i] = DZ_IN[i - 1] + n_shared[i - 1];
231: DZ_OUT[i] = DZ_OUT[i - 1] + n_shared[i - 1];
232: }
234: /* Set the values of DZ_OUT, in order to send this info to the neighbours */
235: /* First, set the auxiliary array pcis->work_N. */
236: PCISScatterArrayNToVecB(pcis->work_N, pcis->D, INSERT_VALUES, SCATTER_REVERSE, pc);
237: for (i = 1; i < n_neigh; i++) {
238: for (j = 0; j < n_shared[i]; j++) DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
239: }
241: /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
242: /* Notice that send_request[] and recv_request[] could have one less element. */
243: /* We make them longer to have request[i] corresponding to neigh[i]. */
244: {
245: PetscMPIInt tag;
246: PetscObjectGetNewTag((PetscObject)pc, &tag);
247: PetscMalloc2(n_neigh + 1, &send_request, n_neigh + 1, &recv_request);
248: for (i = 1; i < n_neigh; i++) {
249: MPI_Isend((void *)(DZ_OUT[i]), n_shared[i], MPIU_SCALAR, neigh[i], tag, PetscObjectComm((PetscObject)pc), &(send_request[i]));
250: MPI_Irecv((void *)(DZ_IN[i]), n_shared[i], MPIU_SCALAR, neigh[i], tag, PetscObjectComm((PetscObject)pc), &(recv_request[i]));
251: }
252: }
254: /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
255: for (j = 0; j < n_shared[0]; j++) DZ_IN[0][j] = pcis->work_N[shared[0][j]];
257: /* Start computing with local D*Z while communication goes on. */
258: /* Apply Schur complement. The result is "stored" in vec (more */
259: /* precisely, vec points to the result, stored in pc_nn->vec1_B) */
260: /* and also scattered to pcnn->work_N. */
261: PCNNApplySchurToChunk(pc, n_shared[0], shared[0], DZ_IN[0], pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec1_D, pcis->vec2_D);
263: /* Compute the first column, while completing the receiving. */
264: for (i = 0; i < n_neigh; i++) {
265: MPI_Status stat;
266: PetscMPIInt ind = 0;
267: if (i > 0) {
268: MPI_Waitany(n_neigh - 1, recv_request + 1, &ind, &stat);
269: ind++;
270: }
271: mat[ind * n_neigh + 0] = 0.0;
272: for (k = 0; k < n_shared[ind]; k++) mat[ind * n_neigh + 0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
273: }
275: /* Compute the remaining of the columns */
276: for (j = 1; j < n_neigh; j++) {
277: PCNNApplySchurToChunk(pc, n_shared[j], shared[j], DZ_IN[j], pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec1_D, pcis->vec2_D);
278: for (i = 0; i < n_neigh; i++) {
279: mat[i * n_neigh + j] = 0.0;
280: for (k = 0; k < n_shared[i]; k++) mat[i * n_neigh + j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
281: }
282: }
284: /* Complete the sending. */
285: if (n_neigh > 1) {
286: MPI_Status *stat;
287: PetscMalloc1(n_neigh - 1, &stat);
288: if (n_neigh - 1) MPI_Waitall(n_neigh - 1, &(send_request[1]), stat);
289: PetscFree(stat);
290: }
292: /* Free the memory for the MPI requests */
293: PetscFree2(send_request, recv_request);
295: /* Free the memory for DZ_OUT */
296: if (DZ_OUT) {
297: PetscFree(DZ_OUT[0]);
298: PetscFree(DZ_OUT);
299: }
301: {
302: PetscMPIInt size;
303: MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
304: /* Create the global coarse vectors (rhs and solution). */
305: VecCreateMPI(PetscObjectComm((PetscObject)pc), 1, size, &(pcnn->coarse_b));
306: VecDuplicate(pcnn->coarse_b, &(pcnn->coarse_x));
307: /* Create and set the global coarse AIJ matrix. */
308: MatCreate(PetscObjectComm((PetscObject)pc), &(pcnn->coarse_mat));
309: MatSetSizes(pcnn->coarse_mat, 1, 1, size, size);
310: MatSetType(pcnn->coarse_mat, MATAIJ);
311: MatSeqAIJSetPreallocation(pcnn->coarse_mat, 1, NULL);
312: MatMPIAIJSetPreallocation(pcnn->coarse_mat, 1, NULL, n_neigh, NULL);
313: MatSetOption(pcnn->coarse_mat, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
314: MatSetOption(pcnn->coarse_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE);
315: MatSetValues(pcnn->coarse_mat, n_neigh, neigh, n_neigh, neigh, mat, ADD_VALUES);
316: MatAssemblyBegin(pcnn->coarse_mat, MAT_FINAL_ASSEMBLY);
317: MatAssemblyEnd(pcnn->coarse_mat, MAT_FINAL_ASSEMBLY);
318: }
320: {
321: PetscMPIInt rank;
322: PetscScalar one = 1.0;
323: MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
324: /* "Zero out" rows of not-purely-Neumann subdomains */
325: if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
326: MatZeroRows(pcnn->coarse_mat, 0, NULL, one, NULL, NULL);
327: } else { /* here it DOES zero the row, since it's not a floating subdomain. */
328: PetscInt row = (PetscInt)rank;
329: MatZeroRows(pcnn->coarse_mat, 1, &row, one, NULL, NULL);
330: }
331: }
333: /* Create the coarse linear solver context */
334: {
335: PC pc_ctx, inner_pc;
336: KSP inner_ksp;
338: KSPCreate(PetscObjectComm((PetscObject)pc), &pcnn->ksp_coarse);
339: PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse, (PetscObject)pc, 2);
340: KSPSetOperators(pcnn->ksp_coarse, pcnn->coarse_mat, pcnn->coarse_mat);
341: KSPGetPC(pcnn->ksp_coarse, &pc_ctx);
342: PCSetType(pc_ctx, PCREDUNDANT);
343: KSPSetType(pcnn->ksp_coarse, KSPPREONLY);
344: PCRedundantGetKSP(pc_ctx, &inner_ksp);
345: KSPGetPC(inner_ksp, &inner_pc);
346: PCSetType(inner_pc, PCLU);
347: KSPSetOptionsPrefix(pcnn->ksp_coarse, "nn_coarse_");
348: KSPSetFromOptions(pcnn->ksp_coarse);
349: /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
350: KSPSetUp(pcnn->ksp_coarse);
351: }
353: /* Free the memory for mat */
354: PetscFree(mat);
356: /* for DEBUGGING, save the coarse matrix to a file. */
357: {
358: PetscBool flg = PETSC_FALSE;
359: PetscOptionsGetBool(NULL, NULL, "-pc_nn_save_coarse_matrix", &flg, NULL);
360: if (flg) {
361: PetscViewer viewer;
362: PetscViewerASCIIOpen(PETSC_COMM_WORLD, "coarse.m", &viewer);
363: PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB);
364: MatView(pcnn->coarse_mat, viewer);
365: PetscViewerPopFormat(viewer);
366: PetscViewerDestroy(&viewer);
367: }
368: }
370: /* Set the variable pcnn->factor_coarse_rhs. */
371: pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
373: /* See historical note 02, at the bottom of this file. */
374: return 0;
375: }
377: /*
378: PCNNApplySchurToChunk -
380: Input parameters:
381: . pcnn
382: . n - size of chunk
383: . idx - indices of chunk
384: . chunk - values
386: Output parameters:
387: . array_N - result of Schur complement applied to chunk, scattered to big array
388: . vec1_B - result of Schur complement applied to chunk
389: . vec2_B - garbage (used as work space)
390: . vec1_D - garbage (used as work space)
391: . vec2_D - garbage (used as work space)
393: */
394: PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt *idx, PetscScalar *chunk, PetscScalar *array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
395: {
396: PetscInt i;
397: PC_IS *pcis = (PC_IS *)(pc->data);
399: PetscArrayzero(array_N, pcis->n);
400: for (i = 0; i < n; i++) array_N[idx[i]] = chunk[i];
401: PCISScatterArrayNToVecB(array_N, vec2_B, INSERT_VALUES, SCATTER_FORWARD, pc);
402: PCISApplySchur(pc, vec2_B, vec1_B, (Vec)0, vec1_D, vec2_D);
403: PCISScatterArrayNToVecB(array_N, vec1_B, INSERT_VALUES, SCATTER_REVERSE, pc);
404: return 0;
405: }
407: /*
408: PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
409: the preconditioner for the Schur complement.
411: Input parameter:
412: . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
414: Output parameters:
415: . z - global vector of interior and interface nodes. The values on the interface are the result of
416: the application of the interface preconditioner to the interface part of r. The values on the
417: interior nodes are garbage.
418: . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
419: . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
420: . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
421: . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
422: . vec1_D - vector of local interior nodes; returns garbage (used as work space)
423: . vec2_D - vector of local interior nodes; returns garbage (used as work space)
424: . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
425: . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
427: */
428: PetscErrorCode PCNNApplyInterfacePreconditioner(PC pc, Vec r, Vec z, PetscScalar *work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, Vec vec2_D, Vec vec1_N, Vec vec2_N)
429: {
430: PC_IS *pcis = (PC_IS *)(pc->data);
432: /*
433: First balancing step.
434: */
435: {
436: PetscBool flg = PETSC_FALSE;
437: PetscOptionsGetBool(NULL, NULL, "-pc_nn_turn_off_first_balancing", &flg, NULL);
438: if (!flg) {
439: PCNNBalancing(pc, r, (Vec)0, z, vec1_B, vec2_B, (Vec)0, vec1_D, vec2_D, work_N);
440: } else {
441: VecCopy(r, z);
442: }
443: }
445: /*
446: Extract the local interface part of z and scale it by D
447: */
448: VecScatterBegin(pcis->global_to_B, z, vec1_B, INSERT_VALUES, SCATTER_FORWARD);
449: VecScatterEnd(pcis->global_to_B, z, vec1_B, INSERT_VALUES, SCATTER_FORWARD);
450: VecPointwiseMult(vec2_B, pcis->D, vec1_B);
452: /* Neumann Solver */
453: PCISApplyInvSchur(pc, vec2_B, vec1_B, vec1_N, vec2_N);
455: /*
456: Second balancing step.
457: */
458: {
459: PetscBool flg = PETSC_FALSE;
460: PetscOptionsGetBool(NULL, NULL, "-pc_turn_off_second_balancing", &flg, NULL);
461: if (!flg) {
462: PCNNBalancing(pc, r, vec1_B, z, vec2_B, vec3_B, (Vec)0, vec1_D, vec2_D, work_N);
463: } else {
464: VecPointwiseMult(vec2_B, pcis->D, vec1_B);
465: VecSet(z, 0.0);
466: VecScatterBegin(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE);
467: VecScatterEnd(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE);
468: }
469: }
470: return 0;
471: }
473: /*
474: PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
475: input argument u is provided), or s, as given in equations
476: (12) and (13), if the input argument u is a null vector.
477: Notice that the input argument u plays the role of u_i in
478: equation (14). The equation numbers refer to [Man93].
480: Input Parameters:
481: . pcnn - NN preconditioner context.
482: . r - MPI vector of all nodes (interior and interface). It's preserved.
483: . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
485: Output Parameters:
486: . z - MPI vector of interior and interface nodes. Returns s or z (see description above).
487: . vec1_B - Sequential vector of local interface nodes. Workspace.
488: . vec2_B - Sequential vector of local interface nodes. Workspace.
489: . vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
490: . vec1_D - Sequential vector of local interior nodes. Workspace.
491: . vec2_D - Sequential vector of local interior nodes. Workspace.
492: . work_N - Array of all local nodes (interior and interface). Workspace.
494: */
495: PetscErrorCode PCNNBalancing(PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
496: {
497: PetscInt k;
498: PetscScalar value;
499: PetscScalar *lambda;
500: PC_NN *pcnn = (PC_NN *)(pc->data);
501: PC_IS *pcis = (PC_IS *)(pc->data);
503: PetscLogEventBegin(PC_ApplyCoarse, pc, 0, 0, 0);
504: if (u) {
505: if (!vec3_B) vec3_B = u;
506: VecPointwiseMult(vec1_B, pcis->D, u);
507: VecSet(z, 0.0);
508: VecScatterBegin(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE);
509: VecScatterEnd(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE);
510: VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD);
511: VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD);
512: PCISApplySchur(pc, vec2_B, vec3_B, (Vec)0, vec1_D, vec2_D);
513: VecScale(vec3_B, -1.0);
514: VecCopy(r, z);
515: VecScatterBegin(pcis->global_to_B, vec3_B, z, ADD_VALUES, SCATTER_REVERSE);
516: VecScatterEnd(pcis->global_to_B, vec3_B, z, ADD_VALUES, SCATTER_REVERSE);
517: } else {
518: VecCopy(r, z);
519: }
520: VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD);
521: VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD);
522: PCISScatterArrayNToVecB(work_N, vec2_B, INSERT_VALUES, SCATTER_REVERSE, pc);
523: for (k = 0, value = 0.0; k < pcis->n_shared[0]; k++) value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]];
524: value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */
525: {
526: PetscMPIInt rank;
527: MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
528: VecSetValue(pcnn->coarse_b, rank, value, INSERT_VALUES);
529: /*
530: Since we are only inserting local values (one value actually) we don't need to do the
531: reduction that tells us there is no data that needs to be moved. Hence we comment out these
532: VecAssemblyBegin(pcnn->coarse_b);
533: VecAssemblyEnd (pcnn->coarse_b);
534: */
535: }
536: KSPSolve(pcnn->ksp_coarse, pcnn->coarse_b, pcnn->coarse_x);
537: if (!u) VecScale(pcnn->coarse_x, -1.0);
538: VecGetArray(pcnn->coarse_x, &lambda);
539: for (k = 0; k < pcis->n_shared[0]; k++) work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k];
540: VecRestoreArray(pcnn->coarse_x, &lambda);
541: PCISScatterArrayNToVecB(work_N, vec2_B, INSERT_VALUES, SCATTER_FORWARD, pc);
542: VecSet(z, 0.0);
543: VecScatterBegin(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE);
544: VecScatterEnd(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE);
545: if (!u) {
546: VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD);
547: VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD);
548: PCISApplySchur(pc, vec2_B, vec1_B, (Vec)0, vec1_D, vec2_D);
549: VecCopy(r, z);
550: }
551: VecScatterBegin(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE);
552: VecScatterEnd(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE);
553: PetscLogEventEnd(PC_ApplyCoarse, pc, 0, 0, 0);
554: return 0;
555: }
557: /* */
558: /* From now on, "footnotes" (or "historical notes"). */
559: /* */
560: /*
561: Historical note 01
563: We considered the possibility of an alternative D_i that would still
564: provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $).
565: The basic principle was still the pseudo-inverse of the counting
566: function; the difference was that we would not count subdomains
567: that do not contribute to the coarse space (i.e., not pure-Neumann
568: subdomains).
570: This turned out to be a bad idea: we would solve trivial Neumann
571: problems in the not pure-Neumann subdomains, since we would be scaling
572: the balanced residual by zero.
573: */
575: /*
576: Historical note 02
578: We tried an alternative coarse problem, that would eliminate exactly a
579: constant error. Turned out not to improve the overall convergence.
580: */