Actual source code: ex7.c
1: static const char help[] = "Tests PetscDeviceAllocate().\n\n";
3: #include "petscdevicetestcommon.h"
5: #define DebugPrintf(comm, ...) PetscPrintf((comm), "[DEBUG OUTPUT] " __VA_ARGS__)
7: static PetscErrorCode IncrementSize(PetscRandom rand, PetscInt *value)
8: {
9: PetscReal rval;
11: // set the interval such that *value += rval never goes below 0 or above 500
12: PetscRandomSetInterval(rand, -(*value), 500 - (*value));
13: PetscRandomGetValueReal(rand, &rval);
14: *value += (PetscInt)rval;
15: DebugPrintf(PetscObjectComm((PetscObject)rand), "n: %" PetscInt_FMT "\n", *value);
16: return 0;
17: }
19: static PetscErrorCode TestAllocate(PetscDeviceContext dctx, PetscRandom rand, PetscMemType mtype)
20: {
21: PetscScalar *ptr, *tmp_ptr;
22: PetscInt n = 10;
24: if (PetscMemTypeDevice(mtype)) {
25: PetscDeviceType dtype;
27: PetscDeviceContextGetDeviceType(dctx, &dtype);
28: // host device context cannot handle this
29: if (dtype == PETSC_DEVICE_HOST) return 0;
30: }
31: // test basic allocation, deallocation
32: IncrementSize(rand, &n);
33: PetscDeviceMalloc(dctx, mtype, n, &ptr);
35: // this ensures the host pointer is at least valid
36: if (PetscMemTypeHost(mtype)) {
37: for (PetscInt i = 0; i < n; ++i) ptr[i] = (PetscScalar)i;
38: }
39: PetscDeviceFree(dctx, ptr);
41: // test alignment of various types
42: {
43: char *char_ptr;
44: short *short_ptr;
45: int *int_ptr;
46: double *double_ptr;
47: long int *long_int_ptr;
49: PetscDeviceMalloc(dctx, mtype, 1, &char_ptr);
50: PetscDeviceMalloc(dctx, mtype, 1, &short_ptr);
51: PetscDeviceMalloc(dctx, mtype, 1, &int_ptr);
52: PetscDeviceMalloc(dctx, mtype, 1, &double_ptr);
53: PetscDeviceMalloc(dctx, mtype, 1, &long_int_ptr);
55: // if an error occurs here, it means the alignment system is broken!
56: PetscDeviceFree(dctx, char_ptr);
57: PetscDeviceFree(dctx, short_ptr);
58: PetscDeviceFree(dctx, int_ptr);
59: PetscDeviceFree(dctx, double_ptr);
60: PetscDeviceFree(dctx, long_int_ptr);
61: }
63: // test that calloc() produces cleared memory
64: IncrementSize(rand, &n);
65: PetscDeviceCalloc(dctx, mtype, n, &ptr);
67: if (PetscMemTypeHost(mtype)) {
68: tmp_ptr = ptr;
69: } else {
70: PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr);
71: PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n);
72: }
73: PetscDeviceContextSynchronize(dctx);
75: if (tmp_ptr == ptr) {
76: tmp_ptr = NULL;
77: } else {
78: PetscDeviceFree(dctx, tmp_ptr);
79: }
80: PetscDeviceFree(dctx, ptr);
82: // test that devicearrayzero produces cleared memory
83: IncrementSize(rand, &n);
84: PetscDeviceMalloc(dctx, mtype, n, &ptr);
85: PetscDeviceArrayZero(dctx, ptr, n);
86: PetscMalloc1(n, &tmp_ptr);
87: PetscDeviceRegisterMemory(tmp_ptr, PETSC_MEMTYPE_HOST, n * sizeof(*tmp_ptr));
88: for (PetscInt i = 0; i < n; ++i) tmp_ptr[i] = (PetscScalar)i;
89: PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n);
90: PetscDeviceContextSynchronize(dctx);
92: PetscDeviceFree(dctx, tmp_ptr);
93: PetscDeviceFree(dctx, ptr);
94: return 0;
95: }
97: static PetscErrorCode TestAsyncCoherence(PetscDeviceContext dctx, PetscRandom rand)
98: {
99: const PetscInt nsub = 2;
100: const PetscInt n = 1024;
101: PetscScalar *ptr, *tmp_ptr;
102: PetscDeviceType dtype;
103: PetscDeviceContext *sub;
105: PetscDeviceContextGetDeviceType(dctx, &dtype);
106: // ensure the streams are nonblocking
107: PetscDeviceContextForkWithStreamType(dctx, PETSC_STREAM_GLOBAL_NONBLOCKING, nsub, &sub);
108: // do a warmup to ensure each context acquires any necessary data structures
109: for (PetscInt i = 0; i < nsub; ++i) {
110: PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_HOST, n, &ptr);
111: PetscDeviceFree(sub[i], ptr);
112: if (dtype != PETSC_DEVICE_HOST) {
113: PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_DEVICE, n, &ptr);
114: PetscDeviceFree(sub[i], ptr);
115: }
116: }
118: // allocate on one
119: PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr);
120: // free on the other
121: PetscDeviceFree(sub[1], ptr);
123: // allocate on one
124: PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr);
125: // zero on the other
126: PetscDeviceArrayZero(sub[1], ptr, n);
127: PetscDeviceContextSynchronize(sub[1]);
128: for (PetscInt i = 0; i < n; ++i) {
130: }
131: PetscDeviceFree(sub[1], ptr);
133: // test the transfers are serialized
134: if (dtype != PETSC_DEVICE_HOST) {
135: PetscDeviceCalloc(dctx, PETSC_MEMTYPE_DEVICE, n, &ptr);
136: PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr);
137: PetscDeviceArrayCopy(sub[0], tmp_ptr, ptr, n);
138: PetscDeviceContextSynchronize(sub[0]);
139: for (PetscInt i = 0; i < n; ++i) {
141: }
142: PetscDeviceFree(sub[1], ptr);
143: }
145: PetscDeviceContextJoin(dctx, nsub, PETSC_DEVICE_CONTEXT_JOIN_DESTROY, &sub);
146: return 0;
147: }
149: int main(int argc, char *argv[])
150: {
151: PetscDeviceContext dctx;
152: PetscRandom rand;
155: PetscInitialize(&argc, &argv, NULL, help);
157: // A vile hack. The -info output is used to test correctness in this test which prints --
158: // among other things -- the PetscObjectId of the PetscDevicContext and the allocated memory.
159: //
160: // Due to device and host creating slightly different number of objects on startup there will
161: // be a mismatch in the ID's. So for the tests involving the host we sit here creating
162: // PetscContainers (and incrementing the global PetscObjectId counter) until it reaches some
163: // arbitrarily high number to ensure that our first PetscDeviceContext has the same ID across
164: // systems.
165: if (PETSC_DEVICE_DEFAULT() == PETSC_DEVICE_HOST) {
166: PetscObjectId id, prev_id = 0;
168: do {
169: PetscContainer c;
171: PetscContainerCreate(PETSC_COMM_WORLD, &c);
172: PetscObjectGetId((PetscObject)c, &id);
173: // sanity check, in case PetscContainer ever stops being a PetscObject
175: prev_id = id;
176: PetscContainerDestroy(&c);
177: } while (id < 10);
178: }
179: PetscDeviceContextGetCurrentContext(&dctx);
181: PetscRandomCreate(PETSC_COMM_WORLD, &rand);
182: // this seed just so happens to keep the allocation size increasing
183: PetscRandomSetSeed(rand, 123);
184: PetscRandomSeed(rand);
185: PetscRandomSetFromOptions(rand);
187: TestAllocate(dctx, rand, PETSC_MEMTYPE_HOST);
188: TestAllocate(dctx, rand, PETSC_MEMTYPE_DEVICE);
189: TestAsyncCoherence(dctx, rand);
191: PetscRandomDestroy(&rand);
192: PetscPrintf(PETSC_COMM_WORLD, "EXIT_SUCCESS\n");
193: PetscFinalize();
194: return 0;
195: }
197: /*TEST
199: build:
200: requires: defined(PETSC_HAVE_CXX)
202: testset:
203: requires: defined(PETSC_USE_INFO), defined(PETSC_USE_DEBUG)
204: args: -info :device
205: suffix: with_info
206: test:
207: requires: !device
208: suffix: host_no_device
209: test:
210: requires: device
211: args: -default_device_type host
212: filter: sed -e 's/host/IMPL/g' -e 's/cuda/IMPL/g' -e 's/hip/IMPL/g' -e 's/sycl/IMPL/g'
213: suffix: host_with_device
214: test:
215: requires: cuda
216: args: -default_device_type cuda
217: suffix: cuda
218: test:
219: requires: hip
220: args: -default_device_type hip
221: suffix: hip
222: test:
223: requires: sycl
224: args: -default_device_type sycl
225: suffix: sycl
227: testset:
228: output_file: ./output/ExitSuccess.out
229: requires: !defined(PETSC_USE_DEBUG)
230: filter: grep -v "\[DEBUG OUTPUT\]"
231: suffix: no_info
232: test:
233: requires: !device
234: suffix: host_no_device
235: test:
236: requires: device
237: args: -default_device_type host
238: suffix: host_with_device
239: test:
240: requires: cuda
241: args: -default_device_type cuda
242: suffix: cuda
243: test:
244: requires: hip
245: args: -default_device_type hip
246: suffix: hip
247: test:
248: requires: sycl
249: args: -default_device_type sycl
250: suffix: sycl
251: TEST*/