Actual source code: performance.c
1: static char help[] = "Time vector operations on GPU\n";
2: /* This program produces the results for Argonne Technical Report ANL-19/41.
3: The technical report and resources for generating data can be found in the
4: repository: https://gitlab.com/hannah_mairs/summit-performance */
6: #include <petscvec.h>
8: int main(int argc, char **argv)
9: {
10: Vec v, w, x;
11: PetscInt n = 15;
12: PetscScalar val;
13: PetscReal norm1, norm2;
14: PetscRandom rctx;
15: #if defined(PETSC_USE_LOG)
16: PetscLogStage stage;
17: #endif
20: PetscInitialize(&argc, &argv, (char *)0, help);
21: PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL);
22: PetscRandomCreate(PETSC_COMM_WORLD, &rctx);
23: PetscRandomSetFromOptions(rctx);
24: VecCreate(PETSC_COMM_WORLD, &v);
25: VecSetSizes(v, PETSC_DECIDE, n);
26: VecSetFromOptions(v);
27: VecDuplicate(v, &w);
28: VecSetRandom(v, rctx);
29: VecSetRandom(w, rctx);
31: /* create dummy vector to clear cache */
32: VecCreate(PETSC_COMM_WORLD, &x);
33: VecSetSizes(x, PETSC_DECIDE, 10000000);
34: VecSetFromOptions(x);
35: VecSetRandom(x, rctx);
37: /* send v to GPU */
38: PetscBarrier(NULL);
39: VecNorm(v, NORM_1, &norm1);
41: /* register a stage work on GPU */
42: PetscLogStageRegister("Work on GPU", &stage);
43: PetscLogStagePush(stage);
44: VecNorm(w, NORM_1, &norm1); /* send w to GPU */
45: VecNorm(x, NORM_1, &norm1); /* clear cache */
46: PetscBarrier(NULL);
47: VecAXPY(w, 1.0, v);
48: VecNorm(x, NORM_INFINITY, &norm1);
49: PetscBarrier(NULL);
50: VecDot(w, v, &val);
51: VecNorm(x, NORM_1, &norm1);
52: PetscBarrier(NULL);
53: VecSet(v, 0.0);
54: VecNorm(x, NORM_2, &norm2);
55: PetscBarrier(NULL);
56: VecCopy(v, w);
57: PetscLogStagePop();
59: PetscPrintf(PETSC_COMM_WORLD, "Test completed successfully!\n");
60: VecDestroy(&v);
61: VecDestroy(&w);
62: VecDestroy(&x);
63: PetscRandomDestroy(&rctx);
64: PetscFinalize();
65: return 0;
66: }
68: /*TEST
70: testset:
71: nsize: 2
72: output_file: output/performance_cuda.out
74: test:
75: suffix: cuda
76: args: -vec_type mpicuda
77: requires: cuda
79: test:
80: suffix: hip
81: args: -vec_type mpihip
82: requires: hip
84: TEST*/