Actual source code: ex13.c

  1: static char help[] = "Benchmark Poisson Problem in 2d and 3d with finite elements.\n\
  2: We solve the Poisson problem in a rectangular domain\n\
  3: using a parallel unstructured mesh (DMPLEX) to discretize it.\n\n\n";

  5: #include <petscdmplex.h>
  6: #include <petscsnes.h>
  7: #include <petscds.h>
  8: #include <petscconvest.h>
  9: #if defined(PETSC_HAVE_AMGX)
 10:   #include <amgx_c.h>
 11: #endif

 13: typedef struct {
 14:   PetscInt  nit;    /* Number of benchmark iterations */
 15:   PetscBool strong; /* Do not integrate the Laplacian by parts */
 16: } AppCtx;

 18: static PetscErrorCode trig_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx)
 19: {
 20:   PetscInt d;
 21:   *u = 0.0;
 22:   for (d = 0; d < dim; ++d) *u += PetscSinReal(2.0 * PETSC_PI * x[d]);
 23:   return 0;
 24: }

 26: static void f0_trig_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
 27: {
 28:   PetscInt d;
 29:   for (d = 0; d < dim; ++d) f0[0] += -4.0 * PetscSqr(PETSC_PI) * PetscSinReal(2.0 * PETSC_PI * x[d]);
 30: }

 32: static void f1_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
 33: {
 34:   PetscInt d;
 35:   for (d = 0; d < dim; ++d) f1[d] = u_x[d];
 36: }

 38: static void g3_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
 39: {
 40:   PetscInt d;
 41:   for (d = 0; d < dim; ++d) g3[d * dim + d] = 1.0;
 42: }

 44: static PetscErrorCode quadratic_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx)
 45: {
 46:   *u = PetscSqr(x[0]) + PetscSqr(x[1]);
 47:   return 0;
 48: }

 50: static void f0_strong_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
 51: {
 52:   PetscInt d;
 53:   for (d = 0; d < dim; ++d) f0[0] -= u_x[dim + d * dim + d];
 54:   f0[0] += 4.0;
 55: }

 57: static PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options)
 58: {
 60:   options->nit    = 10;
 61:   options->strong = PETSC_FALSE;
 62:   PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX");
 63:   PetscOptionsInt("-benchmark_it", "Solve the benchmark problem this many times", "ex13.c", options->nit, &options->nit, NULL);
 64:   PetscOptionsBool("-strong", "Do not integrate the Laplacian by parts", "ex13.c", options->strong, &options->strong, NULL);
 65:   PetscOptionsEnd();
 66:   return 0;
 67: }

 69: static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm)
 70: {
 72:   DMCreate(comm, dm);
 73:   DMSetType(*dm, DMPLEX);
 74:   DMSetFromOptions(*dm);
 75:   DMSetApplicationContext(*dm, user);
 76:   DMViewFromOptions(*dm, NULL, "-dm_view");
 77:   return 0;
 78: }

 80: static PetscErrorCode SetupPrimalProblem(DM dm, AppCtx *user)
 81: {
 82:   PetscDS        ds;
 83:   DMLabel        label;
 84:   const PetscInt id = 1;

 87:   DMGetDS(dm, &ds);
 88:   DMGetLabel(dm, "marker", &label);
 89:   if (user->strong) {
 90:     PetscDSSetResidual(ds, 0, f0_strong_u, NULL);
 91:     PetscDSSetExactSolution(ds, 0, quadratic_u, user);
 92:     DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))quadratic_u, NULL, user, NULL);
 93:   } else {
 94:     PetscDSSetResidual(ds, 0, f0_trig_u, f1_u);
 95:     PetscDSSetJacobian(ds, 0, 0, NULL, NULL, NULL, g3_uu);
 96:     PetscDSSetExactSolution(ds, 0, trig_u, user);
 97:     DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))trig_u, NULL, user, NULL);
 98:   }
 99:   return 0;
100: }

102: static PetscErrorCode SetupDiscretization(DM dm, const char name[], PetscErrorCode (*setup)(DM, AppCtx *), AppCtx *user)
103: {
104:   DM             cdm = dm;
105:   PetscFE        fe;
106:   DMPolytopeType ct;
107:   PetscBool      simplex;
108:   PetscInt       dim, cStart;
109:   char           prefix[PETSC_MAX_PATH_LEN];

112:   DMGetDimension(dm, &dim);
113:   DMPlexGetHeightStratum(dm, 0, &cStart, NULL);
114:   DMPlexGetCellType(dm, cStart, &ct);
115:   simplex = DMPolytopeTypeGetNumVertices(ct) == DMPolytopeTypeGetDim(ct) + 1 ? PETSC_TRUE : PETSC_FALSE; // false
116:   /* Create finite element */
117:   PetscSNPrintf(prefix, PETSC_MAX_PATH_LEN, "%s_", name);
118:   PetscFECreateDefault(PETSC_COMM_SELF, dim, 1, simplex, name ? prefix : NULL, -1, &fe);
119:   PetscObjectSetName((PetscObject)fe, name);
120:   /* Set discretization and boundary conditions for each mesh */
121:   DMSetField(dm, 0, NULL, (PetscObject)fe);
122:   DMCreateDS(dm);
123:   (*setup)(dm, user);
124:   while (cdm) {
125:     DMCopyDisc(dm, cdm);
126:     /* TODO: Check whether the boundary of coarse meshes is marked */
127:     DMGetCoarseDM(cdm, &cdm);
128:   }
129:   PetscFEDestroy(&fe);
130:   return 0;
131: }

133: int main(int argc, char **argv)
134: {
135:   DM             dm;   /* Problem specification */
136:   SNES           snes; /* Nonlinear solver */
137:   Vec            u;    /* Solutions */
138:   AppCtx         user; /* User-defined work context */
139:   PetscLogDouble time;
140:   Mat            Amat;

143:   PetscInitialize(&argc, &argv, NULL, help);
144:   ProcessOptions(PETSC_COMM_WORLD, &user);
145:   /* system */
146:   SNESCreate(PETSC_COMM_WORLD, &snes);
147:   CreateMesh(PETSC_COMM_WORLD, &user, &dm);
148:   SNESSetDM(snes, dm);
149:   SetupDiscretization(dm, "potential", SetupPrimalProblem, &user);
150:   DMCreateGlobalVector(dm, &u);
151:   {
152:     PetscInt N;
153:     VecGetSize(u, &N);
154:     PetscPrintf(PETSC_COMM_WORLD, "Number equations N = %" PetscInt_FMT "\n", N);
155:   }
156:   SNESSetFromOptions(snes);
157:   PetscObjectSetName((PetscObject)u, "potential");
158:   DMPlexSetSNESLocalFEM(dm, &user, &user, &user);
159:   DMSNESCheckFromOptions(snes, u);
160:   PetscTime(&time);
161:   SNESSetUp(snes);
162: #if defined(PETSC_HAVE_AMGX)
163:   KSP                   ksp;
164:   PC                    pc;
165:   PetscBool             flg;
166:   AMGX_resources_handle rsc;
167:   SNESGetKSP(snes, &ksp);
168:   KSPGetPC(ksp, &pc);
169:   PetscObjectTypeCompare((PetscObject)pc, PCAMGX, &flg);
170:   if (flg) {
171:     PCAmgXGetResources(pc, (void *)&rsc);
172:     /* do ... with resource */
173:   }
174: #endif
175:   SNESGetJacobian(snes, &Amat, NULL, NULL, NULL);
176:   MatSetOption(Amat, MAT_SPD, PETSC_TRUE);
177:   MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE);
178:   SNESSolve(snes, NULL, u);
179:   PetscTimeSubtract(&time);
180:   /* Benchmark system */
181:   if (user.nit) {
182:     Vec      b;
183:     PetscInt i;
184: #if defined(PETSC_USE_LOG)
185:     PetscLogStage kspstage;
186: #endif
187:     PetscLogStageRegister("Solve only", &kspstage);
188:     PetscLogStagePush(kspstage);
189:     SNESGetSolution(snes, &u);
190:     SNESGetFunction(snes, &b, NULL, NULL);
191:     for (i = 0; i < user.nit; i++) {
192:       VecZeroEntries(u);
193:       SNESSolve(snes, NULL, u);
194:     }
195:     PetscLogStagePop();
196:   }
197:   SNESGetSolution(snes, &u);
198:   VecViewFromOptions(u, NULL, "-potential_view");
199:   /* Cleanup */
200:   VecDestroy(&u);
201:   SNESDestroy(&snes);
202:   DMDestroy(&dm);
203:   PetscFinalize();
204:   return 0;
205: }

207: /*TEST

209:   test:
210:     suffix: strong
211:     requires: triangle
212:     args: -dm_plex_dim 2 -dm_refine 1 -benchmark_it 0 -dmsnes_check -potential_petscspace_degree 2 -dm_ds_jet_degree 2 -strong

214:   test:
215:     suffix: bench
216:     nsize: 4
217:     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_faces 2,2,1 -dm_refine 2 -dm_view -ksp_monitor \
218:        -benchmark_it 1 -dm_plex_box_upper 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_dim 3 -ksp_converged_reason \
219:        -ksp_norm_type unpreconditioned -ksp_rtol 1.e-6 -ksp_type cg -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 \
220:        -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev  -mg_levels_pc_type jacobi -pc_gamg_coarse_eq_limit 200 \
221:        -pc_gamg_coarse_grid_layout_type compact -pc_gamg_esteig_ksp_max_it 5 -pc_gamg_process_eq_limit 200 \
222:        -pc_gamg_repartition false -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 0 -pc_gamg_threshold 0.001 -pc_gamg_threshold_scale .5 \
223:        -pc_gamg_type agg -pc_type gamg -petscpartitioner_simple_node_grid 1,2,1 -petscpartitioner_simple_process_grid 2,1,1 \
224:        -petscpartitioner_type simple -potential_petscspace_degree 2 -snes_lag_jacobian -2 -snes_max_it 1 -snes_rtol 1.e-8 -snes_type ksponly -use_gpu_aware_mpi true

226:   testset:
227:     nsize: 4
228:     output_file: output/ex13_comparison.out
229:     args: -dm_plex_dim 2 -benchmark_it 10 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \
230:       -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -petscpartitioner_type simple  \
231:       -dm_plex_simplex 0 -snes_type ksponly -dm_view -ksp_type cg -pc_type gamg -pc_gamg_process_eq_limit 400 \
232:       -ksp_norm_type unpreconditioned -ksp_converged_reason
233:     test:
234:       suffix: comparison
235:     test:
236:       suffix: cuda
237:       requires: cuda
238:       args: -dm_mat_type aijcusparse -dm_vec_type cuda
239:     test:
240:       suffix: kokkos
241:       requires: sycl kokkos_kernels
242:       args: -dm_mat_type aijkokkos -dm_vec_type kokkos
243:     test:
244:       suffix: aijmkl_comp
245:       requires: mkl_sparse
246:       args: -dm_mat_type aijmkl

248:   test:
249:     suffix: aijmkl_seq
250:     nsize: 1
251:     requires: mkl_sparse
252:     TODO: broken (INDEFINITE PC)
253:     args: -dm_plex_dim 3 -dm_plex_box_faces 4,4,4 -dm_refine 1 -petscpartitioner_type simple -potential_petscspace_degree 1 -dm_plex_simplex 0 \
254:           -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_threshold -1 -pc_gamg_square_graph 10 -pc_gamg_process_eq_limit 400 \
255:           -pc_gamg_reuse_interpolation -pc_gamg_coarse_eq_limit 10 -pc_gamg_esteig_ksp_type cg -ksp_type cg -ksp_norm_type unpreconditioned \
256:           -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijmkl -dm_vec_type standard

258:   testset:
259:     requires: cuda amgx
260:     filter: grep -v Built | grep -v "AMGX version" | grep -v "CUDA Runtime"
261:     output_file: output/ex13_amgx.out
262:     args: -dm_plex_dim 2 -dm_plex_box_faces 2,2 -dm_refine 2 -petscpartitioner_type simple -potential_petscspace_degree 2 -dm_plex_simplex 0 -ksp_monitor \
263:           -snes_type ksponly -dm_view -ksp_type cg -ksp_norm_type unpreconditioned -ksp_converged_reason -snes_rtol 1.e-4 -pc_type amgx -benchmark_it 1 -pc_amgx_verbose false
264:     nsize: 4
265:     test:
266:       suffix: amgx
267:       args: -dm_mat_type aijcusparse -dm_vec_type cuda
268:     test:
269:       suffix: amgx_cpu
270:       args: -dm_mat_type aij

272: TEST*/