Actual source code: ex7.c

  1: static const char help[] = "Tests PetscDeviceAllocate().\n\n";

  3: #include "petscdevicetestcommon.h"

  5: #define DebugPrintf(comm, ...) PetscPrintf((comm), "[DEBUG OUTPUT] " __VA_ARGS__)

  7: static PetscErrorCode IncrementSize(PetscRandom rand, PetscInt *value)
  8: {
  9:   PetscReal rval;

 11:   // set the interval such that *value += rval never goes below 0 or above 500
 12:   PetscRandomSetInterval(rand, -(*value), 500 - (*value));
 13:   PetscRandomGetValueReal(rand, &rval);
 14:   *value += (PetscInt)rval;
 15:   DebugPrintf(PetscObjectComm((PetscObject)rand), "n: %" PetscInt_FMT "\n", *value);
 16:   return 0;
 17: }

 19: static PetscErrorCode TestAllocate(PetscDeviceContext dctx, PetscRandom rand, PetscMemType mtype)
 20: {
 21:   PetscScalar *ptr, *tmp_ptr;
 22:   PetscInt     n = 10;

 24:   if (PetscMemTypeDevice(mtype)) {
 25:     PetscDeviceType dtype;

 27:     PetscDeviceContextGetDeviceType(dctx, &dtype);
 28:     // host device context cannot handle this
 29:     if (dtype == PETSC_DEVICE_HOST) return 0;
 30:   }
 31:   // test basic allocation, deallocation
 32:   IncrementSize(rand, &n);
 33:   PetscDeviceMalloc(dctx, mtype, n, &ptr);
 35:   // this ensures the host pointer is at least valid
 36:   if (PetscMemTypeHost(mtype)) {
 37:     for (PetscInt i = 0; i < n; ++i) ptr[i] = (PetscScalar)i;
 38:   }
 39:   PetscDeviceFree(dctx, ptr);

 41:   // test alignment of various types
 42:   {
 43:     char     *char_ptr;
 44:     short    *short_ptr;
 45:     int      *int_ptr;
 46:     double   *double_ptr;
 47:     long int *long_int_ptr;

 49:     PetscDeviceMalloc(dctx, mtype, 1, &char_ptr);
 50:     PetscDeviceMalloc(dctx, mtype, 1, &short_ptr);
 51:     PetscDeviceMalloc(dctx, mtype, 1, &int_ptr);
 52:     PetscDeviceMalloc(dctx, mtype, 1, &double_ptr);
 53:     PetscDeviceMalloc(dctx, mtype, 1, &long_int_ptr);

 55:     // if an error occurs here, it means the alignment system is broken!
 56:     PetscDeviceFree(dctx, char_ptr);
 57:     PetscDeviceFree(dctx, short_ptr);
 58:     PetscDeviceFree(dctx, int_ptr);
 59:     PetscDeviceFree(dctx, double_ptr);
 60:     PetscDeviceFree(dctx, long_int_ptr);
 61:   }

 63:   // test that calloc() produces cleared memory
 64:   IncrementSize(rand, &n);
 65:   PetscDeviceCalloc(dctx, mtype, n, &ptr);
 67:   if (PetscMemTypeHost(mtype)) {
 68:     tmp_ptr = ptr;
 69:   } else {
 70:     PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr);
 71:     PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n);
 72:   }
 73:   PetscDeviceContextSynchronize(dctx);
 75:   if (tmp_ptr == ptr) {
 76:     tmp_ptr = NULL;
 77:   } else {
 78:     PetscDeviceFree(dctx, tmp_ptr);
 79:   }
 80:   PetscDeviceFree(dctx, ptr);

 82:   // test that devicearrayzero produces cleared memory
 83:   IncrementSize(rand, &n);
 84:   PetscDeviceMalloc(dctx, mtype, n, &ptr);
 85:   PetscDeviceArrayZero(dctx, ptr, n);
 86:   PetscMalloc1(n, &tmp_ptr);
 87:   PetscDeviceRegisterMemory(tmp_ptr, PETSC_MEMTYPE_HOST, n * sizeof(*tmp_ptr));
 88:   for (PetscInt i = 0; i < n; ++i) tmp_ptr[i] = (PetscScalar)i;
 89:   PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n);
 90:   PetscDeviceContextSynchronize(dctx);
 92:   PetscDeviceFree(dctx, tmp_ptr);
 93:   PetscDeviceFree(dctx, ptr);
 94:   return 0;
 95: }

 97: static PetscErrorCode TestAsyncCoherence(PetscDeviceContext dctx, PetscRandom rand)
 98: {
 99:   const PetscInt      nsub = 2;
100:   const PetscInt      n    = 1024;
101:   PetscScalar        *ptr, *tmp_ptr;
102:   PetscDeviceType     dtype;
103:   PetscDeviceContext *sub;

105:   PetscDeviceContextGetDeviceType(dctx, &dtype);
106:   // ensure the streams are nonblocking
107:   PetscDeviceContextForkWithStreamType(dctx, PETSC_STREAM_GLOBAL_NONBLOCKING, nsub, &sub);
108:   // do a warmup to ensure each context acquires any necessary data structures
109:   for (PetscInt i = 0; i < nsub; ++i) {
110:     PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_HOST, n, &ptr);
111:     PetscDeviceFree(sub[i], ptr);
112:     if (dtype != PETSC_DEVICE_HOST) {
113:       PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_DEVICE, n, &ptr);
114:       PetscDeviceFree(sub[i], ptr);
115:     }
116:   }

118:   // allocate on one
119:   PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr);
120:   // free on the other
121:   PetscDeviceFree(sub[1], ptr);

123:   // allocate on one
124:   PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr);
125:   // zero on the other
126:   PetscDeviceArrayZero(sub[1], ptr, n);
127:   PetscDeviceContextSynchronize(sub[1]);
128:   for (PetscInt i = 0; i < n; ++i) {
130:   }
131:   PetscDeviceFree(sub[1], ptr);

133:   // test the transfers are serialized
134:   if (dtype != PETSC_DEVICE_HOST) {
135:     PetscDeviceCalloc(dctx, PETSC_MEMTYPE_DEVICE, n, &ptr);
136:     PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr);
137:     PetscDeviceArrayCopy(sub[0], tmp_ptr, ptr, n);
138:     PetscDeviceContextSynchronize(sub[0]);
139:     for (PetscInt i = 0; i < n; ++i) {
141:     }
142:     PetscDeviceFree(sub[1], ptr);
143:   }

145:   PetscDeviceContextJoin(dctx, nsub, PETSC_DEVICE_CONTEXT_JOIN_DESTROY, &sub);
146:   return 0;
147: }

149: int main(int argc, char *argv[])
150: {
151:   PetscDeviceContext dctx;
152:   PetscRandom        rand;

155:   PetscInitialize(&argc, &argv, NULL, help);

157:   // A vile hack. The -info output is used to test correctness in this test which prints --
158:   // among other things -- the PetscObjectId of the PetscDevicContext and the allocated memory.
159:   //
160:   // Due to device and host creating slightly different number of objects on startup there will
161:   // be a mismatch in the ID's. So for the tests involving the host we sit here creating
162:   // PetscContainers (and incrementing the global PetscObjectId counter) until it reaches some
163:   // arbitrarily high number to ensure that our first PetscDeviceContext has the same ID across
164:   // systems.
165:   if (PETSC_DEVICE_DEFAULT() == PETSC_DEVICE_HOST) {
166:     PetscObjectId id, prev_id = 0;

168:     do {
169:       PetscContainer c;

171:       PetscContainerCreate(PETSC_COMM_WORLD, &c);
172:       PetscObjectGetId((PetscObject)c, &id);
173:       // sanity check, in case PetscContainer ever stops being a PetscObject
175:       prev_id = id;
176:       PetscContainerDestroy(&c);
177:     } while (id < 10);
178:   }
179:   PetscDeviceContextGetCurrentContext(&dctx);

181:   PetscRandomCreate(PETSC_COMM_WORLD, &rand);
182:   // this seed just so happens to keep the allocation size increasing
183:   PetscRandomSetSeed(rand, 123);
184:   PetscRandomSeed(rand);
185:   PetscRandomSetFromOptions(rand);

187:   TestAllocate(dctx, rand, PETSC_MEMTYPE_HOST);
188:   TestAllocate(dctx, rand, PETSC_MEMTYPE_DEVICE);
189:   TestAsyncCoherence(dctx, rand);

191:   PetscRandomDestroy(&rand);
192:   PetscPrintf(PETSC_COMM_WORLD, "EXIT_SUCCESS\n");
193:   PetscFinalize();
194:   return 0;
195: }

197: /*TEST

199:   build:
200:    requires: defined(PETSC_HAVE_CXX)

202:   testset:
203:    requires: defined(PETSC_USE_INFO), defined(PETSC_USE_DEBUG)
204:    args: -info :device
205:    suffix: with_info
206:    test:
207:      requires: !device
208:      suffix: host_no_device
209:    test:
210:      requires: device
211:      args: -default_device_type host
212:      filter: sed -e 's/host/IMPL/g' -e 's/cuda/IMPL/g' -e 's/hip/IMPL/g' -e 's/sycl/IMPL/g'
213:      suffix: host_with_device
214:    test:
215:      requires: cuda
216:      args: -default_device_type cuda
217:      suffix: cuda
218:    test:
219:      requires: hip
220:      args: -default_device_type hip
221:      suffix: hip
222:    test:
223:      requires: sycl
224:      args: -default_device_type sycl
225:      suffix: sycl

227:   testset:
228:    output_file: ./output/ExitSuccess.out
229:    requires: !defined(PETSC_USE_DEBUG)
230:    filter: grep -v "\[DEBUG OUTPUT\]"
231:    suffix: no_info
232:    test:
233:      requires: !device
234:      suffix: host_no_device
235:    test:
236:      requires: device
237:      args: -default_device_type host
238:      suffix: host_with_device
239:    test:
240:      requires: cuda
241:      args: -default_device_type cuda
242:      suffix: cuda
243:    test:
244:      requires: hip
245:      args: -default_device_type hip
246:      suffix: hip
247:    test:
248:      requires: sycl
249:      args: -default_device_type sycl
250:      suffix: sycl
251: TEST*/