Actual source code: ex60.c

  1: static char help[] = "Working out corner cases of the ASM preconditioner.\n\n";

  3: #include <petscksp.h>

  5: int main(int argc, char **args)
  6: {
  7:   KSP         ksp;
  8:   PC          pc;
  9:   Mat         A;
 10:   Vec         u, x, b;
 11:   PetscReal   error;
 12:   PetscMPIInt rank, size, sized;
 13:   PetscInt    M = 8, N = 8, m, n, rstart, rend, r;
 14:   PetscBool   userSubdomains = PETSC_FALSE;

 17:   PetscInitialize(&argc, &args, NULL, help);
 18:   PetscOptionsGetInt(NULL, NULL, "-M", &M, NULL);
 19:   PetscOptionsGetInt(NULL, NULL, "-N", &N, NULL);
 20:   PetscOptionsGetBool(NULL, NULL, "-user_subdomains", &userSubdomains, NULL);
 21:   /* Do parallel decomposition */
 22:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
 23:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
 24:   sized = (PetscMPIInt)PetscSqrtReal((PetscReal)size);
 28:   /* Assemble the matrix for the five point stencil, YET AGAIN
 29:        Every other process will be empty */
 30:   MatCreate(PETSC_COMM_WORLD, &A);
 31:   m = (sized > 1) ? (rank % 2) ? 0 : 2 * M / sized : M;
 32:   n = N / sized;
 33:   MatSetSizes(A, m * n, m * n, M * N, M * N);
 34:   MatSetFromOptions(A);
 35:   MatSetUp(A);
 36:   MatGetOwnershipRange(A, &rstart, &rend);
 37:   for (r = rstart; r < rend; ++r) {
 38:     const PetscScalar diag = 4.0, offdiag = -1.0;
 39:     const PetscInt    i = r / N;
 40:     const PetscInt    j = r - i * N;
 41:     PetscInt          c;

 43:     if (i > 0) {
 44:       c = r - n;
 45:       MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES);
 46:     }
 47:     if (i < M - 1) {
 48:       c = r + n;
 49:       MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES);
 50:     }
 51:     if (j > 0) {
 52:       c = r - 1;
 53:       MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES);
 54:     }
 55:     if (j < N - 1) {
 56:       c = r + 1;
 57:       MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES);
 58:     }
 59:     MatSetValues(A, 1, &r, 1, &r, &diag, INSERT_VALUES);
 60:   }
 61:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
 62:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
 63:   /* Setup Solve */
 64:   MatCreateVecs(A, &x, &b);
 65:   VecDuplicate(x, &u);
 66:   VecSet(u, 1.0);
 67:   MatMult(A, u, b);
 68:   KSPCreate(PETSC_COMM_WORLD, &ksp);
 69:   KSPSetOperators(ksp, A, A);
 70:   KSPGetPC(ksp, &pc);
 71:   PCSetType(pc, PCASM);
 72:   /* Setup ASM by hand */
 73:   if (userSubdomains) {
 74:     IS        is;
 75:     PetscInt *rows;

 77:     /* Use no overlap for now */
 78:     PetscMalloc1(rend - rstart, &rows);
 79:     for (r = rstart; r < rend; ++r) rows[r - rstart] = r;
 80:     ISCreateGeneral(PETSC_COMM_SELF, rend - rstart, rows, PETSC_OWN_POINTER, &is);
 81:     PCASMSetLocalSubdomains(pc, 1, &is, &is);
 82:     ISDestroy(&is);
 83:   }
 84:   KSPSetFromOptions(ksp);
 85:   /* Solve and Compare */
 86:   KSPSolve(ksp, b, x);
 87:   VecAXPY(x, -1.0, u);
 88:   VecNorm(x, NORM_INFINITY, &error);
 89:   PetscPrintf(PETSC_COMM_WORLD, "Infinity norm of the error: %g\n", (double)error);
 90:   /* Cleanup */
 91:   KSPDestroy(&ksp);
 92:   MatDestroy(&A);
 93:   VecDestroy(&u);
 94:   VecDestroy(&x);
 95:   VecDestroy(&b);
 96:   PetscFinalize();
 97:   return 0;
 98: }

100: /*TEST

102:    test:
103:       suffix: 0
104:       args: -ksp_view

106:    test:
107:       requires: kokkos_kernels
108:       suffix: 0_kokkos
109:       args: -ksp_view -mat_type aijkokkos

111:    test:
112:       requires: cuda
113:       suffix: 0_cuda
114:       args: -ksp_view -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse

116:    test:
117:       suffix: 1
118:       nsize: 4
119:       args: -ksp_view

121:    test:
122:       requires: kokkos_kernels
123:       suffix: 1_kokkos
124:       nsize: 4
125:       args: -ksp_view -mat_type aijkokkos

127:    test:
128:       requires: cuda
129:       suffix: 1_cuda
130:       nsize: 4
131:       args: -ksp_view -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse

133:    test:
134:       suffix: 2
135:       nsize: 4
136:       args: -user_subdomains -ksp_view

138:    test:
139:       requires: kokkos_kernels
140:       suffix: 2_kokkos
141:       nsize: 4
142:       args: -user_subdomains -ksp_view -mat_type aijkokkos

144:    test:
145:       requires: cuda
146:       suffix: 2_cuda
147:       nsize: 4
148:       args: -user_subdomains -ksp_view -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse

150: TEST*/