Actual source code: ex236.c

  1: static char help[] = "Test CPU/GPU memory leaks, MatMult and MatMultTransposeAdd during successive matrix assemblies\n\n";

  3: #include <petscmat.h>

  5: int main(int argc, char **argv)
  6: {
  7:   PetscMPIInt rank, size;
  8:   Mat         A;
  9:   PetscInt    i, j, k, n = 3, vstart, rstart, rend, margin;
 10:   Vec         x, y;

 13:   PetscInitialize(&argc, &argv, (char *)0, help);
 14:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
 15:   MPI_Comm_size(PETSC_COMM_WORLD, &size);

 17:   MatCreate(PETSC_COMM_WORLD, &A);
 18:   MatSetSizes(A, n, n, PETSC_DECIDE, PETSC_DECIDE);
 19:   MatSetFromOptions(A);

 21:   MatMPIAIJSetPreallocation(A, n, NULL, 0, NULL);
 22:   MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
 23:   MatGetOwnershipRange(A, &rstart, &rend);
 24:   MatCreateVecs(A, &x, &y);
 25:   VecSet(x, 1.0);

 27:   /*
 28:     Matrix A only has nonzeros in the diagonal block, which is of size 3x3.
 29:     We do three successive assemblies on A. The first two have the same non-zero
 30:     pattern but different values, and the third breaks the non-zero pattern. The
 31:     first two assemblies have enough zero-rows that triggers compressed-row storage
 32:     in MATAIJ and MATAIJCUSPARSE.

 34:     These settings are used to test memory management and correctness in MatMult
 35:     and MatMultTransposeAdd.
 36:   */

 38:   for (k = 0; k < 3; k++) { /* Three assemblies */
 39:     vstart = (size * k + rank) * n * n + 1;
 40:     margin = (k == 2) ? 0 : 2; /* Create two zero-rows in the first two assemblies */
 41:     for (i = rstart; i < rend - margin; i++) {
 42:       for (j = rstart; j < rend; j++) {
 43:         MatSetValue(A, i, j, (PetscScalar)vstart, INSERT_VALUES);
 44:         vstart++;
 45:       }
 46:     }
 47:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
 48:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
 49:     MatMult(A, x, y);
 50:     MatMultTransposeAdd(A, x, y, y); /* y[i] = sum of row i and column i of A */
 51:     VecView(y, PETSC_VIEWER_STDOUT_WORLD);
 52:   }

 54:   MatDestroy(&A);
 55:   VecDestroy(&x);
 56:   VecDestroy(&y);
 57:   PetscFinalize();

 59:   /* Uncomment this line if you want to use "cuda-memcheck --leaf-check full" to check this program */
 60:   /*cudaDeviceReset();*/
 61:   return 0;
 62: }

 64: /*TEST

 66:    testset:
 67:      nsize: 2
 68:      output_file: output/ex236_1.out
 69:      filter: grep -v type

 71:      test:
 72:        args: -mat_type aij

 74:      test:
 75:        requires: cuda
 76:        suffix: cuda
 77:        args: -mat_type aijcusparse
 78: TEST*/