Actual source code: bddcscalingbasic.c
1: #include <petsc/private/pcbddcimpl.h>
2: #include <petsc/private/pcbddcprivateimpl.h>
3: #include <petscblaslapack.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
6: /* prototypes for deluxe functions */
7: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
13: static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A, Mat B, Mat X)
14: {
15: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
16: const PetscScalar *b;
17: PetscScalar *x;
18: PetscInt n;
19: PetscBLASInt nrhs, info, m;
20: PetscBool flg;
22: PetscBLASIntCast(A->rmap->n, &m);
23: PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL);
25: PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL);
28: MatGetSize(B, NULL, &n);
29: PetscBLASIntCast(n, &nrhs);
30: MatDenseGetArrayRead(B, &b);
31: MatDenseGetArray(X, &x);
32: PetscArraycpy(x, b, m * nrhs);
33: MatDenseRestoreArrayRead(B, &b);
35: if (A->factortype == MAT_FACTOR_LU) {
36: PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
38: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only LU factor supported");
40: MatDenseRestoreArray(X, &x);
41: PetscLogFlops(nrhs * (2.0 * m * m - m));
42: return 0;
43: }
45: static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
46: {
47: PC_IS *pcis = (PC_IS *)pc->data;
48: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
50: /* Apply partition of unity */
51: VecPointwiseMult(pcbddc->work_scaling, pcis->D, local_interface_vector);
52: VecSet(global_vector, 0.0);
53: VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE);
54: VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE);
55: return 0;
56: }
58: static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
59: {
60: PC_IS *pcis = (PC_IS *)pc->data;
61: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
62: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
64: VecSet(pcbddc->work_scaling, 0.0);
65: VecSet(y, 0.0);
66: if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
67: PetscInt i;
68: const PetscScalar *array_x, *array_D;
69: PetscScalar *array;
70: VecGetArrayRead(x, &array_x);
71: VecGetArrayRead(pcis->D, &array_D);
72: VecGetArray(pcbddc->work_scaling, &array);
73: for (i = 0; i < deluxe_ctx->n_simple; i++) array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]] * array_D[deluxe_ctx->idx_simple_B[i]];
74: VecRestoreArray(pcbddc->work_scaling, &array);
75: VecRestoreArrayRead(pcis->D, &array_D);
76: VecRestoreArrayRead(x, &array_x);
77: }
78: /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
79: if (deluxe_ctx->seq_mat) {
80: PetscInt i;
81: for (i = 0; i < deluxe_ctx->seq_n; i++) {
82: if (deluxe_ctx->change) {
83: VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD);
84: VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD);
85: if (deluxe_ctx->change_with_qr) {
86: Mat change;
88: KSPGetOperators(deluxe_ctx->change[i], &change, NULL);
89: MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]);
90: } else {
91: KSPSolve(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]);
92: }
93: } else {
94: VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD);
95: VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD);
96: }
97: MatMultTranspose(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]);
98: if (deluxe_ctx->seq_mat_inv_sum[i]) {
99: PetscScalar *x;
101: VecGetArray(deluxe_ctx->seq_work2[i], &x);
102: VecPlaceArray(deluxe_ctx->seq_work1[i], x);
103: VecRestoreArray(deluxe_ctx->seq_work2[i], &x);
104: MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]);
105: VecResetArray(deluxe_ctx->seq_work1[i]);
106: }
107: if (deluxe_ctx->change) {
108: Mat change;
110: KSPGetOperators(deluxe_ctx->change[i], &change, NULL);
111: MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]);
112: VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE);
113: VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE);
114: } else {
115: VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE);
116: VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE);
117: }
118: }
119: }
120: /* put local boundary part in global vector */
121: VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE);
122: VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE);
123: return 0;
124: }
126: PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
127: {
128: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
134: PetscUseMethod(pc, "PCBDDCScalingExtension_C", (PC, Vec, Vec), (pc, local_interface_vector, global_vector));
135: return 0;
136: }
138: static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
139: {
140: PC_IS *pcis = (PC_IS *)pc->data;
142: VecScatterBegin(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD);
143: VecScatterEnd(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD);
144: /* Apply partition of unity */
145: VecPointwiseMult(local_interface_vector, pcis->D, local_interface_vector);
146: return 0;
147: }
149: static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
150: {
151: PC_IS *pcis = (PC_IS *)pc->data;
152: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
153: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
155: /* get local boundary part of global vector */
156: VecScatterBegin(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD);
157: VecScatterEnd(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD);
158: if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
159: PetscInt i;
160: PetscScalar *array_y;
161: const PetscScalar *array_D;
162: VecGetArray(y, &array_y);
163: VecGetArrayRead(pcis->D, &array_D);
164: for (i = 0; i < deluxe_ctx->n_simple; i++) array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
165: VecRestoreArrayRead(pcis->D, &array_D);
166: VecRestoreArray(y, &array_y);
167: }
168: /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
169: if (deluxe_ctx->seq_mat) {
170: PetscInt i;
171: for (i = 0; i < deluxe_ctx->seq_n; i++) {
172: if (deluxe_ctx->change) {
173: Mat change;
175: VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD);
176: VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD);
177: KSPGetOperators(deluxe_ctx->change[i], &change, NULL);
178: MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]);
179: } else {
180: VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD);
181: VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD);
182: }
183: if (deluxe_ctx->seq_mat_inv_sum[i]) {
184: PetscScalar *x;
186: VecGetArray(deluxe_ctx->seq_work1[i], &x);
187: VecPlaceArray(deluxe_ctx->seq_work2[i], x);
188: VecRestoreArray(deluxe_ctx->seq_work1[i], &x);
189: MatSolve(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]);
190: VecResetArray(deluxe_ctx->seq_work2[i]);
191: }
192: MatMult(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]);
193: if (deluxe_ctx->change) {
194: if (deluxe_ctx->change_with_qr) {
195: Mat change;
197: KSPGetOperators(deluxe_ctx->change[i], &change, NULL);
198: MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]);
199: } else {
200: KSPSolveTranspose(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]);
201: }
202: VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE);
203: VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE);
204: } else {
205: VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE);
206: VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE);
207: }
208: }
209: }
210: return 0;
211: }
213: PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
214: {
215: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
221: PetscUseMethod(pc, "PCBDDCScalingRestriction_C", (PC, Vec, Vec), (pc, global_vector, local_interface_vector));
222: return 0;
223: }
225: PetscErrorCode PCBDDCScalingSetUp(PC pc)
226: {
227: PC_IS *pcis = (PC_IS *)pc->data;
228: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
231: PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0);
232: /* create work vector for the operator */
233: VecDestroy(&pcbddc->work_scaling);
234: VecDuplicate(pcis->vec1_B, &pcbddc->work_scaling);
235: /* always rebuild pcis->D */
236: if (pcis->use_stiffness_scaling) {
237: PetscScalar *a;
238: PetscInt i, n;
240: MatGetDiagonal(pcbddc->local_mat, pcis->vec1_N);
241: VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD);
242: VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD);
243: VecAbs(pcis->D);
244: VecGetLocalSize(pcis->D, &n);
245: VecGetArray(pcis->D, &a);
246: for (i = 0; i < n; i++)
247: if (PetscAbsScalar(a[i]) < PETSC_SMALL) a[i] = 1.0;
248: VecRestoreArray(pcis->D, &a);
249: }
250: VecSet(pcis->vec1_global, 0.0);
251: VecScatterBegin(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
252: VecScatterEnd(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
253: VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
254: VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
255: VecPointwiseDivide(pcis->D, pcis->D, pcis->vec1_B);
256: /* now setup */
257: if (pcbddc->use_deluxe_scaling) {
258: if (!pcbddc->deluxe_ctx) PCBDDCScalingCreate_Deluxe(pc);
259: PCBDDCScalingSetUp_Deluxe(pc);
260: PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Deluxe);
261: PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Deluxe);
262: } else {
263: PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Basic);
264: PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Basic);
265: }
266: PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0);
268: /* test */
269: if (pcbddc->dbg_flag) {
270: Mat B0_B = NULL;
271: Vec B0_Bv = NULL, B0_Bv2 = NULL;
272: Vec vec2_global;
273: PetscViewer viewer = pcbddc->dbg_viewer;
274: PetscReal error;
276: /* extension -> from local to parallel */
277: VecSet(pcis->vec1_global, 0.0);
278: VecSetRandom(pcis->vec1_B, NULL);
279: VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
280: VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
281: VecDuplicate(pcis->vec1_global, &vec2_global);
282: VecCopy(pcis->vec1_global, vec2_global);
283: VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
284: VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
285: if (pcbddc->benign_n) {
286: IS is_dummy;
288: ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy);
289: MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B);
290: ISDestroy(&is_dummy);
291: MatCreateVecs(B0_B, NULL, &B0_Bv);
292: VecDuplicate(B0_Bv, &B0_Bv2);
293: MatMult(B0_B, pcis->vec1_B, B0_Bv);
294: }
295: PCBDDCScalingExtension(pc, pcis->vec1_B, pcis->vec1_global);
296: if (pcbddc->benign_saddle_point) {
297: PetscReal errorl = 0.;
298: VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
299: VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD);
300: if (pcbddc->benign_n) {
301: MatMult(B0_B, pcis->vec1_B, B0_Bv2);
302: VecAXPY(B0_Bv, -1.0, B0_Bv2);
303: VecNorm(B0_Bv, NORM_INFINITY, &errorl);
304: }
305: MPI_Allreduce(&errorl, &error, 1, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)pc));
306: PetscViewerASCIIPrintf(viewer, "Error benign extension %1.14e\n", (double)error);
307: }
308: VecAXPY(pcis->vec1_global, -1.0, vec2_global);
309: VecNorm(pcis->vec1_global, NORM_INFINITY, &error);
310: PetscViewerASCIIPrintf(viewer, "Error scaling extension %1.14e\n", (double)error);
311: VecDestroy(&vec2_global);
313: /* restriction -> from parallel to local */
314: VecSet(pcis->vec1_global, 0.0);
315: VecSetRandom(pcis->vec1_B, NULL);
316: VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
317: VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
318: PCBDDCScalingRestriction(pc, pcis->vec1_global, pcis->vec1_B);
319: VecScale(pcis->vec1_B, -1.0);
320: VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
321: VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE);
322: VecNorm(pcis->vec1_global, NORM_INFINITY, &error);
323: PetscViewerASCIIPrintf(viewer, "Error scaling restriction %1.14e\n", (double)error);
324: MatDestroy(&B0_B);
325: VecDestroy(&B0_Bv);
326: VecDestroy(&B0_Bv2);
327: }
328: return 0;
329: }
331: PetscErrorCode PCBDDCScalingDestroy(PC pc)
332: {
333: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
335: if (pcbddc->deluxe_ctx) PCBDDCScalingDestroy_Deluxe(pc);
336: VecDestroy(&pcbddc->work_scaling);
337: PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", NULL);
338: PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", NULL);
339: return 0;
340: }
342: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
343: {
344: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
345: PCBDDCDeluxeScaling deluxe_ctx;
347: PetscNew(&deluxe_ctx);
348: pcbddc->deluxe_ctx = deluxe_ctx;
349: return 0;
350: }
352: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
353: {
354: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
356: PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);
357: PetscFree(pcbddc->deluxe_ctx);
358: return 0;
359: }
361: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
362: {
363: PetscInt i;
365: PetscFree(deluxe_ctx->idx_simple_B);
366: deluxe_ctx->n_simple = 0;
367: for (i = 0; i < deluxe_ctx->seq_n; i++) {
368: VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);
369: VecDestroy(&deluxe_ctx->seq_work1[i]);
370: VecDestroy(&deluxe_ctx->seq_work2[i]);
371: MatDestroy(&deluxe_ctx->seq_mat[i]);
372: MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
373: }
374: PetscFree5(deluxe_ctx->seq_scctx, deluxe_ctx->seq_work1, deluxe_ctx->seq_work2, deluxe_ctx->seq_mat, deluxe_ctx->seq_mat_inv_sum);
375: PetscFree(deluxe_ctx->workspace);
376: deluxe_ctx->seq_n = 0;
377: return 0;
378: }
380: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
381: {
382: PC_IS *pcis = (PC_IS *)pc->data;
383: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
384: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
385: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
387: /* reset data structures if the topology has changed */
388: if (pcbddc->recompute_topography) PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);
390: /* Compute data structures to solve sequential problems */
391: PCBDDCScalingSetUp_Deluxe_Private(pc);
393: /* diagonal scaling on interface dofs not contained in cc */
394: if (sub_schurs->is_vertices || sub_schurs->is_dir) {
395: PetscInt n_com, n_dir;
396: n_com = 0;
397: if (sub_schurs->is_vertices) ISGetLocalSize(sub_schurs->is_vertices, &n_com);
398: n_dir = 0;
399: if (sub_schurs->is_dir) ISGetLocalSize(sub_schurs->is_dir, &n_dir);
400: if (!deluxe_ctx->n_simple) {
401: deluxe_ctx->n_simple = n_dir + n_com;
402: PetscMalloc1(deluxe_ctx->n_simple, &deluxe_ctx->idx_simple_B);
403: if (sub_schurs->is_vertices) {
404: PetscInt nmap;
405: const PetscInt *idxs;
407: ISGetIndices(sub_schurs->is_vertices, &idxs);
408: ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_com, idxs, &nmap, deluxe_ctx->idx_simple_B);
410: ISRestoreIndices(sub_schurs->is_vertices, &idxs);
411: }
412: if (sub_schurs->is_dir) {
413: PetscInt nmap;
414: const PetscInt *idxs;
416: ISGetIndices(sub_schurs->is_dir, &idxs);
417: ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_dir, idxs, &nmap, deluxe_ctx->idx_simple_B + n_com);
419: ISRestoreIndices(sub_schurs->is_dir, &idxs);
420: }
421: PetscSortInt(deluxe_ctx->n_simple, deluxe_ctx->idx_simple_B);
422: } else {
424: }
425: } else {
426: deluxe_ctx->n_simple = 0;
427: deluxe_ctx->idx_simple_B = NULL;
428: }
429: return 0;
430: }
432: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
433: {
434: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
435: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
436: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
437: PetscScalar *matdata, *matdata2;
438: PetscInt i, max_subset_size, cum, cum2;
439: const PetscInt *idxs;
440: PetscBool newsetup = PETSC_FALSE;
443: if (!sub_schurs->n_subs) return 0;
445: /* Allocate arrays for subproblems */
446: if (!deluxe_ctx->seq_n) {
447: deluxe_ctx->seq_n = sub_schurs->n_subs;
448: PetscCalloc5(deluxe_ctx->seq_n, &deluxe_ctx->seq_scctx, deluxe_ctx->seq_n, &deluxe_ctx->seq_work1, deluxe_ctx->seq_n, &deluxe_ctx->seq_work2, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat_inv_sum);
449: newsetup = PETSC_TRUE;
452: /* the change of basis is just a reference to sub_schurs->change (if any) */
453: deluxe_ctx->change = sub_schurs->change;
454: deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
456: /* Create objects for deluxe */
457: max_subset_size = 0;
458: for (i = 0; i < sub_schurs->n_subs; i++) {
459: PetscInt subset_size;
460: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
461: max_subset_size = PetscMax(subset_size, max_subset_size);
462: }
463: if (newsetup) PetscMalloc1(2 * max_subset_size, &deluxe_ctx->workspace);
464: cum = cum2 = 0;
465: ISGetIndices(sub_schurs->is_Ej_all, &idxs);
466: MatSeqAIJGetArray(sub_schurs->S_Ej_all, &matdata);
467: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &matdata2);
468: for (i = 0; i < deluxe_ctx->seq_n; i++) {
469: PetscInt subset_size;
471: ISGetLocalSize(sub_schurs->is_subs[i], &subset_size);
472: if (newsetup) {
473: IS sub;
474: /* work vectors */
475: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace, &deluxe_ctx->seq_work1[i]);
476: VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace + subset_size, &deluxe_ctx->seq_work2[i]);
478: /* scatters */
479: ISCreateGeneral(PETSC_COMM_SELF, subset_size, idxs + cum, PETSC_COPY_VALUES, &sub);
480: VecScatterCreate(pcbddc->work_scaling, sub, deluxe_ctx->seq_work1[i], NULL, &deluxe_ctx->seq_scctx[i]);
481: ISDestroy(&sub);
482: }
484: /* S_E_j */
485: MatDestroy(&deluxe_ctx->seq_mat[i]);
486: MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata + cum2, &deluxe_ctx->seq_mat[i]);
488: /* \sum_k S^k_E_j */
489: MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
490: MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata2 + cum2, &deluxe_ctx->seq_mat_inv_sum[i]);
491: MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_SPD, sub_schurs->is_posdef);
492: MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_HERMITIAN, sub_schurs->is_hermitian);
493: if (sub_schurs->is_hermitian) {
494: MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL);
495: } else {
496: MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL, NULL);
497: }
498: if (pcbddc->deluxe_singlemat) {
499: Mat X, Y;
500: if (!sub_schurs->is_hermitian) {
501: MatTranspose(deluxe_ctx->seq_mat[i], MAT_INITIAL_MATRIX, &X);
502: } else {
503: PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);
504: X = deluxe_ctx->seq_mat[i];
505: }
506: MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &Y);
507: if (!sub_schurs->is_hermitian) {
508: PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i], X, Y);
509: } else {
510: MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i], X, Y);
511: }
513: MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
514: MatDestroy(&deluxe_ctx->seq_mat[i]);
515: MatDestroy(&X);
516: if (deluxe_ctx->change) {
517: Mat C, CY;
519: KSPGetOperators(deluxe_ctx->change[i], &C, NULL);
520: MatMatMult(C, Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CY);
521: MatMatTransposeMult(CY, C, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y);
522: MatDestroy(&CY);
523: MatProductClear(Y); /* clear internal matproduct structure of Y since CY is destroyed */
524: }
525: MatTranspose(Y, MAT_INPLACE_MATRIX, &Y);
526: deluxe_ctx->seq_mat[i] = Y;
527: }
528: cum += subset_size;
529: cum2 += subset_size * subset_size;
530: }
531: ISRestoreIndices(sub_schurs->is_Ej_all, &idxs);
532: MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &matdata);
533: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &matdata2);
534: if (pcbddc->deluxe_singlemat) {
535: deluxe_ctx->change = NULL;
536: deluxe_ctx->change_with_qr = PETSC_FALSE;
537: }
539: if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
540: for (i = 0; i < deluxe_ctx->seq_n; i++) {
541: if (newsetup) {
542: PC pc;
544: KSPGetPC(deluxe_ctx->change[i], &pc);
545: PCSetType(pc, PCLU);
546: KSPSetFromOptions(deluxe_ctx->change[i]);
547: }
548: KSPSetUp(deluxe_ctx->change[i]);
549: }
550: }
551: return 0;
552: }