Actual source code: ex32.c


  2: static char help[] = "Tests MATSEQDENSECUDA\n\n";

  4: #include <petscmat.h>

  6: int main(int argc, char **argv)
  7: {
  8:   Mat       A, AC, B;
  9:   PetscInt  m = 10, n = 10;
 10:   PetscReal r, tol    = 10 * PETSC_SMALL;

 13:   PetscInitialize(&argc, &argv, (char *)0, help);
 14:   PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL);
 15:   PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL);
 16:   MatCreate(PETSC_COMM_SELF, &A);
 17:   MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m, n);
 18:   MatSetType(A, MATSEQDENSE);
 19:   MatSetFromOptions(A);
 20:   MatSeqDenseSetPreallocation(A, NULL);
 21:   MatSetRandom(A, NULL);
 22: #if 0
 23:   PetscInt       i,j;
 24:   PetscScalar    val;
 25:   for (i=0; i<m; i++) {
 26:     for (j=0; j<n; j++) {
 27:       val = (PetscScalar)(i+j);
 28:       MatSetValues(A,1,&i,1,&j,&val,INSERT_VALUES);
 29:     }
 30:   }
 31:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
 32:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
 33: #endif

 35:   /* Create a CUDA version of A */
 36: #if defined(PETSC_HAVE_CUDA)
 37:   MatConvert(A, MATSEQDENSECUDA, MAT_INITIAL_MATRIX, &AC);
 38: #else
 39:   MatDuplicate(A, MAT_COPY_VALUES, &AC);
 40: #endif
 41:   MatDuplicate(AC, MAT_COPY_VALUES, &B);

 43:   /* full CUDA AXPY */
 44:   MatAXPY(B, -1.0, AC, SAME_NONZERO_PATTERN);
 45:   MatNorm(B, NORM_INFINITY, &r);

 48:   /* test Copy */
 49:   MatCopy(AC, B, SAME_NONZERO_PATTERN);

 51:   /* call MatAXPY_Basic since B is CUDA, A is CPU,  */
 52:   MatAXPY(B, -1.0, A, SAME_NONZERO_PATTERN);
 53:   MatNorm(B, NORM_INFINITY, &r);

 56:   if (m == n) {
 57:     Mat B1, B2;

 59:     MatCopy(AC, B, SAME_NONZERO_PATTERN);
 60:     /* full CUDA PtAP */
 61:     MatPtAP(B, AC, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B1);

 63:     /* CPU PtAP since A is on the CPU only */
 64:     MatPtAP(B, A, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B2);

 66:     MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN);
 67:     MatNorm(B2, NORM_INFINITY, &r);

 70:     /* test reuse */
 71:     MatPtAP(B, AC, MAT_REUSE_MATRIX, PETSC_DEFAULT, &B1);
 72:     MatPtAP(B, A, MAT_REUSE_MATRIX, PETSC_DEFAULT, &B2);
 73:     MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN);
 74:     MatNorm(B2, NORM_INFINITY, &r);

 77:     MatDestroy(&B1);
 78:     MatDestroy(&B2);
 79:   }

 81:   MatDestroy(&B);
 82:   MatDestroy(&AC);
 83:   MatDestroy(&A);
 84:   PetscFinalize();
 85:   return 0;
 86: }

 88: /*TEST

 90:    build:
 91:      requires: cuda

 93:    test:
 94:      output_file: output/ex32_1.out
 95:      args: -m {{3 5 12}} -n {{3 5 12}}
 96:      suffix: seqdensecuda

 98: TEST*/