Actual source code: ex2cu.cu

  1: static char help[] = "Benchmarking cudaPointerGetAttributes() time\n";
  2: /*
  3:   Running example on Summit at OLCF:
  4:   # run with total 1 resource set (RS) (-n1), 1 RS per node (-r1), 1 MPI rank (-a1), 7 cores (-c7) and 1 GPU (-g1) per RS
  5:   $ jsrun -n1 -a1 -c7 -g1 -r1  ./ex2cu
  6:     Average cudaPointerGetAttributes() time = 0.31 microseconds
  7: */
  8: #include <petscsys.h>
  9: #include <petscdevice_cuda.h>

 11: int main(int argc, char **argv)
 12: {
 13:   PetscInt                     i, n = 4000;
 14:   cudaError_t                  cerr;
 15:   PetscScalar                **ptrs;
 16:   PetscLogDouble               tstart, tend, time;
 17:   struct cudaPointerAttributes attr;

 20:   PetscInitialize(&argc, &argv, (char *)0, help);
 21:   PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL);
 22:   cudaStreamSynchronize(NULL); /* Initialize CUDA runtime to get more accurate timing below */

 24:   PetscMalloc1(n, &ptrs);
 25:   for (i = 0; i < n; i++) {
 26:     if (i % 2) PetscMalloc1(i + 16, &ptrs[i]);
 27:     else cudaMalloc((void **)&ptrs[i], (i + 16) * sizeof(PetscScalar));
 28:   }

 30:   PetscTime(&tstart);
 31:   for (i = 0; i < n; i++) {
 32:     cerr = cudaPointerGetAttributes(&attr, ptrs[i]);
 33:     if (cerr) cerr = cudaGetLastError();
 34:   }
 35:   PetscTime(&tend);
 36:   time = (tend - tstart) * 1e6 / n;

 38:   PetscPrintf(PETSC_COMM_WORLD, "Average cudaPointerGetAttributes() time = %.2f microseconds\n", time);

 40:   for (i = 0; i < n; i++) {
 41:     if (i % 2) PetscFree(ptrs[i]);
 42:     else cudaFree(ptrs[i]);
 43:   }
 44:   PetscFree(ptrs);

 46:   PetscFinalize();
 47:   return 0;
 48: }

 50: /*TEST
 51:   build:
 52:     requires: cuda

 54:   test:
 55:     requires: cuda
 56:     args: -n 2
 57:     output_file: output/empty.out
 58:     filter: grep "DOES_NOT_EXIST"

 60: TEST*/