Actual source code: ex2cu.cu
1: static char help[] = "Benchmarking cudaPointerGetAttributes() time\n";
2: /*
3: Running example on Summit at OLCF:
4: # run with total 1 resource set (RS) (-n1), 1 RS per node (-r1), 1 MPI rank (-a1), 7 cores (-c7) and 1 GPU (-g1) per RS
5: $ jsrun -n1 -a1 -c7 -g1 -r1 ./ex2cu
6: Average cudaPointerGetAttributes() time = 0.31 microseconds
7: */
8: #include <petscsys.h>
9: #include <petscdevice_cuda.h>
11: int main(int argc, char **argv)
12: {
13: PetscInt i, n = 4000;
14: cudaError_t cerr;
15: PetscScalar **ptrs;
16: PetscLogDouble tstart, tend, time;
17: struct cudaPointerAttributes attr;
20: PetscInitialize(&argc, &argv, (char *)0, help);
21: PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL);
22: cudaStreamSynchronize(NULL); /* Initialize CUDA runtime to get more accurate timing below */
24: PetscMalloc1(n, &ptrs);
25: for (i = 0; i < n; i++) {
26: if (i % 2) PetscMalloc1(i + 16, &ptrs[i]);
27: else cudaMalloc((void **)&ptrs[i], (i + 16) * sizeof(PetscScalar));
28: }
30: PetscTime(&tstart);
31: for (i = 0; i < n; i++) {
32: cerr = cudaPointerGetAttributes(&attr, ptrs[i]);
33: if (cerr) cerr = cudaGetLastError();
34: }
35: PetscTime(&tend);
36: time = (tend - tstart) * 1e6 / n;
38: PetscPrintf(PETSC_COMM_WORLD, "Average cudaPointerGetAttributes() time = %.2f microseconds\n", time);
40: for (i = 0; i < n; i++) {
41: if (i % 2) PetscFree(ptrs[i]);
42: else cudaFree(ptrs[i]);
43: }
44: PetscFree(ptrs);
46: PetscFinalize();
47: return 0;
48: }
50: /*TEST
51: build:
52: requires: cuda
54: test:
55: requires: cuda
56: args: -n 2
57: output_file: output/empty.out
58: filter: grep "DOES_NOT_EXIST"
60: TEST*/