Actual source code: ex1.c


  2: static char help[] = "Tests LU, Cholesky, and QR factorization and MatMatSolve() for a sequential dense matrix. \n\
  3:                       For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK.       \n\
  4:                       For MATSEQDENSECUDA, it uses cusolverDn routines \n\n";

  6: #include <petscmat.h>

  8: static PetscErrorCode createMatsAndVecs(PetscInt m, PetscInt n, PetscInt nrhs, PetscBool full, Mat *_mat, Mat *_RHS, Mat *_SOLU, Vec *_x, Vec *_y, Vec *_b)
  9: {
 10:   PetscRandom rand;
 11:   Mat         mat, RHS, SOLU;
 12:   PetscInt    rstart, rend;
 13:   PetscInt    cstart, cend;
 14:   PetscScalar value = 1.0;
 15:   Vec         x, y, b;

 17:   /* create multiple vectors RHS and SOLU */
 18:   MatCreate(PETSC_COMM_WORLD, &RHS);
 19:   MatSetSizes(RHS, PETSC_DECIDE, PETSC_DECIDE, m, nrhs);
 20:   MatSetType(RHS, MATDENSE);
 21:   MatSetOptionsPrefix(RHS, "rhs_");
 22:   MatSetFromOptions(RHS);
 23:   MatSeqDenseSetPreallocation(RHS, NULL);

 25:   PetscRandomCreate(PETSC_COMM_WORLD, &rand);
 26:   PetscRandomSetFromOptions(rand);
 27:   MatSetRandom(RHS, rand);

 29:   if (m == n) {
 30:     MatDuplicate(RHS, MAT_DO_NOT_COPY_VALUES, &SOLU);
 31:   } else {
 32:     MatCreate(PETSC_COMM_WORLD, &SOLU);
 33:     MatSetSizes(SOLU, PETSC_DECIDE, PETSC_DECIDE, n, nrhs);
 34:     MatSetType(SOLU, MATDENSE);
 35:     MatSeqDenseSetPreallocation(SOLU, NULL);
 36:   }
 37:   MatSetRandom(SOLU, rand);

 39:   /* create matrix */
 40:   MatCreate(PETSC_COMM_WORLD, &mat);
 41:   MatSetSizes(mat, PETSC_DECIDE, PETSC_DECIDE, m, n);
 42:   MatSetType(mat, MATDENSE);
 43:   MatSetFromOptions(mat);
 44:   MatSetUp(mat);
 45:   MatGetOwnershipRange(mat, &rstart, &rend);
 46:   MatGetOwnershipRangeColumn(mat, &cstart, &cend);
 47:   if (!full) {
 48:     for (PetscInt i = rstart; i < rend; i++) {
 49:       if (m == n) {
 50:         value = (PetscReal)i + 1;
 51:         MatSetValues(mat, 1, &i, 1, &i, &value, INSERT_VALUES);
 52:       } else {
 53:         for (PetscInt j = cstart; j < cend; j++) {
 54:           value = ((PetscScalar)i + 1.) / (PetscSqr(i - j) + 1.);
 55:           MatSetValues(mat, 1, &i, 1, &j, &value, INSERT_VALUES);
 56:         }
 57:       }
 58:     }
 59:     MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY);
 60:     MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY);
 61:   } else {
 62:     MatSetRandom(mat, rand);
 63:     if (m == n) {
 64:       Mat T;

 66:       MatMatTransposeMult(mat, mat, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &T);
 67:       MatDestroy(&mat);
 68:       mat = T;
 69:     }
 70:   }

 72:   /* create single vectors */
 73:   MatCreateVecs(mat, &x, &b);
 74:   VecDuplicate(x, &y);
 75:   VecSet(x, value);
 76:   PetscRandomDestroy(&rand);
 77:   *_mat  = mat;
 78:   *_RHS  = RHS;
 79:   *_SOLU = SOLU;
 80:   *_x    = x;
 81:   *_y    = y;
 82:   *_b    = b;
 83:   return 0;
 84: }

 86: int main(int argc, char **argv)
 87: {
 88:   Mat         mat, F, RHS, SOLU;
 89:   MatInfo     info;
 90:   PetscInt    m = 15, n = 10, i, j, nrhs = 2;
 91:   Vec         x, y, b, ytmp;
 92:   IS          perm;
 93:   PetscReal   norm, tol = PETSC_SMALL;
 94:   PetscMPIInt size;
 95:   char        solver[64];
 96:   PetscBool   inplace, full = PETSC_FALSE, ldl = PETSC_TRUE, qr = PETSC_TRUE;

 99:   PetscInitialize(&argc, &argv, (char *)0, help);
100:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
102:   PetscStrcpy(solver, "petsc");
103:   PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL);
104:   PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL);
105:   PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL);
106:   PetscOptionsGetBool(NULL, NULL, "-ldl", &ldl, NULL);
107:   PetscOptionsGetBool(NULL, NULL, "-qr", &qr, NULL);
108:   PetscOptionsGetBool(NULL, NULL, "-full", &full, NULL);
109:   PetscOptionsGetString(NULL, NULL, "-solver_type", solver, sizeof(solver), NULL);

111:   createMatsAndVecs(n, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b);
112:   VecDuplicate(y, &ytmp);

114:   /* Only SeqDense* support in-place factorizations and NULL permutations */
115:   PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQDENSE, &inplace);
116:   MatGetLocalSize(mat, &i, NULL);
117:   MatGetOwnershipRange(mat, &j, NULL);
118:   ISCreateStride(PETSC_COMM_WORLD, i, j, 1, &perm);

120:   MatGetInfo(mat, MAT_LOCAL, &info);
121:   PetscPrintf(PETSC_COMM_WORLD, "matrix nonzeros = %" PetscInt_FMT ", allocated nonzeros = %" PetscInt_FMT "\n", (PetscInt)info.nz_used, (PetscInt)info.nz_allocated);
122:   MatMult(mat, x, b);

124:   /* Cholesky factorization - perm and factinfo are ignored by LAPACK */
125:   /* in-place Cholesky */
126:   if (inplace) {
127:     Mat RHS2;

129:     MatDuplicate(mat, MAT_COPY_VALUES, &F);
130:     if (!ldl) MatSetOption(F, MAT_SPD, PETSC_TRUE);
131:     MatCholeskyFactor(F, perm, 0);
132:     MatSolve(F, b, y);
133:     VecAXPY(y, -1.0, x);
134:     VecNorm(y, NORM_2, &norm);
135:     if (norm > tol) PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for in-place Cholesky %g\n", (double)norm);

137:     MatMatSolve(F, RHS, SOLU);
138:     MatMatMult(mat, SOLU, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &RHS2);
139:     MatAXPY(RHS, -1.0, RHS2, SAME_NONZERO_PATTERN);
140:     MatNorm(RHS, NORM_FROBENIUS, &norm);
141:     if (norm > tol) PetscPrintf(PETSC_COMM_WORLD, "Error: Norm of residual for in-place Cholesky (MatMatSolve) %g\n", (double)norm);
142:     MatDestroy(&F);
143:     MatDestroy(&RHS2);
144:   }

146:   /* out-of-place Cholesky */
147:   MatGetFactor(mat, solver, MAT_FACTOR_CHOLESKY, &F);
148:   if (!ldl) MatSetOption(F, MAT_SPD, PETSC_TRUE);
149:   MatCholeskyFactorSymbolic(F, mat, perm, 0);
150:   MatCholeskyFactorNumeric(F, mat, 0);
151:   MatSolve(F, b, y);
152:   VecAXPY(y, -1.0, x);
153:   VecNorm(y, NORM_2, &norm);
154:   if (norm > tol) PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place Cholesky %g\n", (double)norm);
155:   MatDestroy(&F);

157:   /* LU factorization - perms and factinfo are ignored by LAPACK */
158:   i = n - 1;
159:   MatZeroRows(mat, 1, &i, -1.0, NULL, NULL);
160:   MatMult(mat, x, b);

162:   /* in-place LU */
163:   if (inplace) {
164:     Mat RHS2;

166:     MatDuplicate(mat, MAT_COPY_VALUES, &F);
167:     MatLUFactor(F, perm, perm, 0);
168:     MatSolve(F, b, y);
169:     VecAXPY(y, -1.0, x);
170:     VecNorm(y, NORM_2, &norm);
171:     if (norm > tol) PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for in-place LU %g\n", (double)norm);
172:     MatMatSolve(F, RHS, SOLU);
173:     MatMatMult(mat, SOLU, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &RHS2);
174:     MatAXPY(RHS, -1.0, RHS2, SAME_NONZERO_PATTERN);
175:     MatNorm(RHS, NORM_FROBENIUS, &norm);
176:     if (norm > tol) PetscPrintf(PETSC_COMM_WORLD, "Error: Norm of residual for in-place LU (MatMatSolve) %g\n", (double)norm);
177:     MatDestroy(&F);
178:     MatDestroy(&RHS2);
179:   }

181:   /* out-of-place LU */
182:   MatGetFactor(mat, solver, MAT_FACTOR_LU, &F);
183:   MatLUFactorSymbolic(F, mat, perm, perm, 0);
184:   MatLUFactorNumeric(F, mat, 0);
185:   MatSolve(F, b, y);
186:   VecAXPY(y, -1.0, x);
187:   VecNorm(y, NORM_2, &norm);
188:   if (norm > tol) PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place LU %g\n", (double)norm);

190:   /* free space */
191:   ISDestroy(&perm);
192:   MatDestroy(&F);
193:   MatDestroy(&mat);
194:   MatDestroy(&RHS);
195:   MatDestroy(&SOLU);
196:   VecDestroy(&x);
197:   VecDestroy(&b);
198:   VecDestroy(&y);
199:   VecDestroy(&ytmp);

201:   if (qr) {
202:     /* setup rectangular */
203:     createMatsAndVecs(m, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b);
204:     VecDuplicate(y, &ytmp);

206:     /* QR factorization - perms and factinfo are ignored by LAPACK */
207:     MatMult(mat, x, b);

209:     /* in-place QR */
210:     if (inplace) {
211:       Mat SOLU2;

213:       MatDuplicate(mat, MAT_COPY_VALUES, &F);
214:       MatQRFactor(F, NULL, 0);
215:       MatSolve(F, b, y);
216:       VecAXPY(y, -1.0, x);
217:       VecNorm(y, NORM_2, &norm);
218:       if (norm > tol) PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for in-place QR %g\n", (double)norm);
219:       MatMatMult(mat, SOLU, MAT_REUSE_MATRIX, PETSC_DEFAULT, &RHS);
220:       MatDuplicate(SOLU, MAT_DO_NOT_COPY_VALUES, &SOLU2);
221:       MatMatSolve(F, RHS, SOLU2);
222:       MatAXPY(SOLU2, -1.0, SOLU, SAME_NONZERO_PATTERN);
223:       MatNorm(SOLU2, NORM_FROBENIUS, &norm);
224:       if (norm > tol) PetscPrintf(PETSC_COMM_WORLD, "Error: Norm of error for in-place QR (MatMatSolve) %g\n", (double)norm);
225:       MatDestroy(&F);
226:       MatDestroy(&SOLU2);
227:     }

229:     /* out-of-place QR */
230:     MatGetFactor(mat, solver, MAT_FACTOR_QR, &F);
231:     MatQRFactorSymbolic(F, mat, NULL, NULL);
232:     MatQRFactorNumeric(F, mat, NULL);
233:     MatSolve(F, b, y);
234:     VecAXPY(y, -1.0, x);
235:     VecNorm(y, NORM_2, &norm);
236:     if (norm > tol) PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place QR %g\n", (double)norm);

238:     if (m == n) {
239:       /* out-of-place MatSolveTranspose */
240:       MatMultTranspose(mat, x, b);
241:       MatSolveTranspose(F, b, y);
242:       VecAXPY(y, -1.0, x);
243:       VecNorm(y, NORM_2, &norm);
244:       if (norm > tol) PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place QR %g\n", (double)norm);
245:     }

247:     /* free space */
248:     MatDestroy(&F);
249:     MatDestroy(&mat);
250:     MatDestroy(&RHS);
251:     MatDestroy(&SOLU);
252:     VecDestroy(&x);
253:     VecDestroy(&b);
254:     VecDestroy(&y);
255:     VecDestroy(&ytmp);
256:   }
257:   PetscFinalize();
258:   return 0;
259: }

261: /*TEST

263:    test:

265:    test:
266:      requires: cuda
267:      suffix: seqdensecuda
268:      args: -mat_type seqdensecuda -rhs_mat_type seqdensecuda -ldl 0 -solver_type {{petsc cuda}}
269:      output_file: output/ex1_1.out

271:    test:
272:      requires: cuda
273:      suffix: seqdensecuda_2
274:      args: -ldl 0 -solver_type cuda
275:      output_file: output/ex1_1.out

277:    test:
278:      requires: cuda
279:      suffix: seqdensecuda_seqaijcusparse
280:      args: -mat_type seqaijcusparse -rhs_mat_type seqdensecuda -qr 0
281:      output_file: output/ex1_2.out

283:    test:
284:      requires: cuda viennacl
285:      suffix: seqdensecuda_seqaijviennacl
286:      args: -mat_type seqaijviennacl -rhs_mat_type seqdensecuda -qr 0
287:      output_file: output/ex1_2.out

289:    test:
290:      suffix: 4
291:      args: -m 10 -n 10
292:      output_file: output/ex1_1.out

294: TEST*/