Actual source code: mal.c

  1: /*
  2:     Code that allows a user to dictate what malloc() PETSc uses.
  3: */
  4: #define PETSC_DESIRE_FEATURE_TEST_MACROS /* for posix_memalign() */
  5: #include <petscsys.h>
  6: #include <stdarg.h>
  7: #if defined(PETSC_HAVE_MALLOC_H)
  8:   #include <malloc.h>
  9: #endif
 10: #if defined(PETSC_HAVE_MEMKIND)
 11:   #include <errno.h>
 12:   #include <memkind.h>
 13: typedef enum {
 14:   PETSC_MK_DEFAULT       = 0,
 15:   PETSC_MK_HBW_PREFERRED = 1
 16: } PetscMemkindType;
 17: PetscMemkindType currentmktype  = PETSC_MK_HBW_PREFERRED;
 18: PetscMemkindType previousmktype = PETSC_MK_HBW_PREFERRED;
 19: #endif
 20: /*
 21:         We want to make sure that all mallocs of double or complex numbers are complex aligned.
 22:     1) on systems with memalign() we call that routine to get an aligned memory location
 23:     2) on systems without memalign() we
 24:        - allocate one sizeof(PetscScalar) extra space
 25:        - we shift the pointer up slightly if needed to get PetscScalar aligned
 26:        - if shifted we store at ptr[-1] the amount of shift (plus a classid)
 27: */
 28: #define SHIFT_CLASSID 456123

 30: PETSC_EXTERN PetscErrorCode PetscMallocAlign(size_t mem, PetscBool clear, int line, const char func[], const char file[], void **result)
 31: {
 32:   if (!mem) {
 33:     *result = NULL;
 34:     return 0;
 35:   }
 36: #if PetscDefined(HAVE_MEMKIND)
 37:   {
 38:     int err;

 40:     err = memkind_posix_memalign(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, result, PETSC_MEMALIGN, mem);
 42:     if (err == ENOMEM) PetscInfo(NULL, "Memkind: fail to request HBW memory %.0f, falling back to normal memory\n", (PetscLogDouble)mem);
 44:     if (clear) PetscMemzero(*result, mem);
 45:   }
 46: #else /* PetscDefined(HAVE_MEMKIND) */
 47:   #if PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)
 48:   if (clear) *result = calloc(1 + mem / sizeof(int), sizeof(int));
 49:   else *result = malloc(mem);

 52:   if (PetscLogMemory) PetscMemzero(*result, mem);
 53:   #elif PetscDefined(HAVE_POSIX_MEMALIGN)
 54:   int ret = posix_memalign(result, PETSC_MEMALIGN, mem);
 56:   if (clear || PetscLogMemory) PetscMemzero(*result, mem);
 57:   #else  /* PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) || PetscDefined(HAVE_POSIX_MEMALIGN) */
 58:   {
 59:     int *ptr, shift;
 60:     /*
 61:       malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned
 62:     */
 63:     if (clear) {
 64:       ptr = (int *)calloc(1 + (mem + 2 * PETSC_MEMALIGN) / sizeof(int), sizeof(int));
 65:     } else {
 66:       ptr = (int *)malloc(mem + 2 * PETSC_MEMALIGN);
 67:     }
 69:     shift          = (int)(((PETSC_UINTPTR_T)ptr) % PETSC_MEMALIGN);
 70:     shift          = (2 * PETSC_MEMALIGN - shift) / sizeof(int);
 71:     ptr[shift - 1] = shift + SHIFT_CLASSID;
 72:     ptr += shift;
 73:     *result = (void *)ptr;
 74:     if (PetscLogMemory) PetscMemzero(*result, mem);
 75:   }
 76:   #endif /* PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) || PetscDefined(HAVE_POSIX_MEMALIGN) */
 77: #endif   /* PetscDefined(HAVE_MEMKIND) */
 78:   return 0;
 79: }

 81: PETSC_EXTERN PetscErrorCode PetscFreeAlign(void *ptr, int line, const char func[], const char file[])
 82: {
 83:   if (!ptr) return 0;
 84: #if PetscDefined(HAVE_MEMKIND)
 85:   memkind_free(0, ptr); /* specify the kind to 0 so that memkind will look up for the right type */
 86: #else                   /* PetscDefined(HAVE_MEMKIND) */
 87:   #if (!(PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !PetscDefined(HAVE_POSIX_MEMALIGN))
 88:   {
 89:     /*
 90:       Previous int tells us how many ints the pointer has been shifted from
 91:       the original address provided by the system malloc().
 92:     */
 93:     const int shift = *(((int *)ptr) - 1) - SHIFT_CLASSID;

 97:     ptr = (void *)(((int *)ptr) - shift);
 98:   }
 99:   #endif

101:   #if PetscDefined(HAVE_FREE_RETURN_INT)
102:   int err = free(ptr);
104:   #else
105:   free(ptr);
106:   #endif
107: #endif
108:   return 0;
109: }

111: PETSC_EXTERN PetscErrorCode PetscReallocAlign(size_t mem, int line, const char func[], const char file[], void **result)
112: {
113:   if (!mem) {
114:     PetscFreeAlign(*result, line, func, file);
115:     *result = NULL;
116:     return 0;
117:   }
118: #if PetscDefined(HAVE_MEMKIND)
119:   *result = memkind_realloc(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, *result, mem);
120: #else
121:   #if (!(PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !PetscDefined(HAVE_POSIX_MEMALIGN))
122:   {
123:     /*
124:       Previous int tells us how many ints the pointer has been shifted from
125:       the original address provided by the system malloc().
126:     */
127:     int shift = *(((int *)*result) - 1) - SHIFT_CLASSID;
130:     *result = (void *)(((int *)*result) - shift);
131:   }
132:   #endif

134:   #if (PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) || PetscDefined(HAVE_POSIX_MEMALIGN)
135:   *result = realloc(*result, mem);
136:   #else
137:   {
138:     /*
139:       malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned
140:     */
141:     int *ptr = (int *)realloc(*result, mem + 2 * PETSC_MEMALIGN);
142:     if (ptr) {
143:       int shift      = (int)(((PETSC_UINTPTR_T)ptr) % PETSC_MEMALIGN);
144:       shift          = (2 * PETSC_MEMALIGN - shift) / sizeof(int);
145:       ptr[shift - 1] = shift + SHIFT_CLASSID;
146:       ptr += shift;
147:       *result = (void *)ptr;
148:     } else {
149:       *result = NULL;
150:     }
151:   }
152:   #endif
153: #endif
155: #if PetscDefined(HAVE_POSIX_MEMALIGN)
156:   /* There are no standard guarantees that realloc() maintains the alignment of memalign(), so I think we have to
157:    * realloc and, if the alignment is wrong, malloc/copy/free. */
158:   if (((size_t)(*result)) % PETSC_MEMALIGN) {
159:     void *newResult;
160:   #if PetscDefined(HAVE_MEMKIND)
161:     {
162:       int err;
163:       err = memkind_posix_memalign(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, &newResult, PETSC_MEMALIGN, mem);
165:       if (err == ENOMEM) PetscInfo(NULL, "Memkind: fail to request HBW memory %.0f, falling back to normal memory\n", (PetscLogDouble)mem);
166:     }
168:   #else
170:   #endif
171:     PetscMemcpy(newResult, *result, mem);
172:   #if PetscDefined(HAVE_FREE_RETURN_INT)
173:     {
174:       int err = free(*result);
176:     }
177:   #else
178:     #if defined(PETSC_HAVE_MEMKIND)
179:     memkind_free(0, *result);
180:     #else
181:     free(*result);
182:     #endif
183:   #endif
184:     *result = newResult;
185:   }
186: #endif
187:   return 0;
188: }

190: PetscErrorCode (*PetscTrMalloc)(size_t, PetscBool, int, const char[], const char[], void **) = PetscMallocAlign;
191: PetscErrorCode (*PetscTrFree)(void *, int, const char[], const char[])                       = PetscFreeAlign;
192: PetscErrorCode (*PetscTrRealloc)(size_t, int, const char[], const char[], void **)           = PetscReallocAlign;

194: PETSC_INTERN PetscBool petscsetmallocvisited;
195: PetscBool              petscsetmallocvisited = PETSC_FALSE;

197: /*@C
198:    PetscMallocSet - Sets the routines used to do mallocs and frees.
199:    This routine MUST be called before `PetscInitialize()` and may be
200:    called only once.

202:    Not Collective

204:    Input Parameters:
205: +  imalloc - the routine that provides the malloc (also provides calloc(), which is used depends on the second argument)
206: .  ifree - the routine that provides the free
207: -  iralloc - the routine that provides the realloc

209:    Level: developer

211: .seealso: `PetscMallocClear()`
212: @*/
213: PetscErrorCode PetscMallocSet(PetscErrorCode (*imalloc)(size_t, PetscBool, int, const char[], const char[], void **), PetscErrorCode (*ifree)(void *, int, const char[], const char[]), PetscErrorCode (*iralloc)(size_t, int, const char[], const char[], void **))
214: {
216:   PetscTrMalloc         = imalloc;
217:   PetscTrFree           = ifree;
218:   PetscTrRealloc        = iralloc;
219:   petscsetmallocvisited = PETSC_TRUE;
220:   return 0;
221: }

223: /*@C
224:    PetscMallocClear - Resets the routines used to do mallocs and frees to the defaults.

226:    Not Collective

228:    Level: developer

230:    Note:
231:     In general one should never run a PETSc program with different malloc() and
232:     free() settings for different parts; this is because one NEVER wants to
233:     free() an address that was malloced by a different memory management system

235:     Called in `PetscFinalize()` so that if `PetscInitialize()` is called again it starts with a fresh slate of allocation information

237: .seealso: `PetscMallocSet`
238: @*/
239: PetscErrorCode PetscMallocClear(void)
240: {
241:   PetscTrMalloc         = PetscMallocAlign;
242:   PetscTrFree           = PetscFreeAlign;
243:   PetscTrRealloc        = PetscReallocAlign;
244:   petscsetmallocvisited = PETSC_FALSE;
245:   return 0;
246: }

248: PetscErrorCode PetscMemoryTrace(const char label[])
249: {
250:   PetscLogDouble        mem, mal;
251:   static PetscLogDouble oldmem = 0, oldmal = 0;

253:   PetscMemoryGetCurrentUsage(&mem);
254:   PetscMallocGetCurrentUsage(&mal);

256:   PetscPrintf(PETSC_COMM_WORLD, "%s High water  %8.3f MB increase %8.3f MB Current %8.3f MB increase %8.3f MB\n", label, mem * 1e-6, (mem - oldmem) * 1e-6, mal * 1e-6, (mal - oldmal) * 1e-6);
257:   oldmem = mem;
258:   oldmal = mal;
259:   return 0;
260: }

262: static PetscErrorCode (*PetscTrMallocOld)(size_t, PetscBool, int, const char[], const char[], void **) = PetscMallocAlign;
263: static PetscErrorCode (*PetscTrReallocOld)(size_t, int, const char[], const char[], void **)           = PetscReallocAlign;
264: static PetscErrorCode (*PetscTrFreeOld)(void *, int, const char[], const char[])                       = PetscFreeAlign;

266: /*@C
267:    PetscMallocSetDRAM - Set `PetscMalloc()` to use DRAM.
268:      If memkind is available, change the memkind type. Otherwise, switch the
269:      current malloc and free routines to the `PetscMallocAlign()` and
270:      `PetscFreeAlign()` (PETSc default).

272:    Not Collective

274:    Level: developer

276:    Note:
277:      This provides a way to do the allocation on DRAM temporarily. One
278:      can switch back to the previous choice by calling `PetscMallocReset()`.

280: .seealso: `PetscMallocReset()`
281: @*/
282: PetscErrorCode PetscMallocSetDRAM(void)
283: {
284:   if (PetscTrMalloc == PetscMallocAlign) {
285: #if defined(PETSC_HAVE_MEMKIND)
286:     previousmktype = currentmktype;
287:     currentmktype  = PETSC_MK_DEFAULT;
288: #endif
289:   } else {
290:     /* Save the previous choice */
291:     PetscTrMallocOld  = PetscTrMalloc;
292:     PetscTrReallocOld = PetscTrRealloc;
293:     PetscTrFreeOld    = PetscTrFree;
294:     PetscTrMalloc     = PetscMallocAlign;
295:     PetscTrFree       = PetscFreeAlign;
296:     PetscTrRealloc    = PetscReallocAlign;
297:   }
298:   return 0;
299: }

301: /*@C
302:    PetscMallocResetDRAM - Reset the changes made by `PetscMallocSetDRAM()`

304:    Not Collective

306:    Level: developer

308: .seealso: `PetscMallocSetDRAM()`
309: @*/
310: PetscErrorCode PetscMallocResetDRAM(void)
311: {
312:   if (PetscTrMalloc == PetscMallocAlign) {
313: #if defined(PETSC_HAVE_MEMKIND)
314:     currentmktype = previousmktype;
315: #endif
316:   } else {
317:     /* Reset to the previous choice */
318:     PetscTrMalloc  = PetscTrMallocOld;
319:     PetscTrRealloc = PetscTrReallocOld;
320:     PetscTrFree    = PetscTrFreeOld;
321:   }
322:   return 0;
323: }

325: static PetscBool petscmalloccoalesce =
326: #if defined(PETSC_USE_MALLOC_COALESCED)
327:   PETSC_TRUE;
328: #else
329:   PETSC_FALSE;
330: #endif

332: /*@C
333:    PetscMallocSetCoalesce - Use coalesced malloc when allocating groups of objects

335:    Not Collective

337:    Input Parameters:
338: .  coalesce - `PETSC_TRUE` to use coalesced malloc for multi-object allocation.

340:    Options Database Keys:
341: .  -malloc_coalesce - turn coalesced malloc on or off

343:    Notes:
344:    PETSc uses coalesced malloc by default for optimized builds and not for debugging builds.

346:    This default can be changed via the command-line option -malloc_coalesce or by calling this function.

348:    This function can only be called immediately after `PetscInitialize()`

350:    Level: developer

352: .seealso: `PetscMallocA()`
353: @*/
354: PetscErrorCode PetscMallocSetCoalesce(PetscBool coalesce)
355: {
356:   petscmalloccoalesce = coalesce;
357:   return 0;
358: }

360: /*@C
361:    PetscMallocA - Allocate and optionally clear one or more objects, possibly using coalesced malloc

363:    Not Collective

365:    Input Parameters:
366: +  n - number of objects to allocate (at least 1)
367: .  clear - use calloc() to allocate space initialized to zero
368: .  lineno - line number to attribute allocation (typically __LINE__)
369: .  function - function to attribute allocation (typically PETSC_FUNCTION_NAME)
370: .  filename - file name to attribute allocation (typically __FILE__)
371: -  bytes0 - first of n object sizes

373:    Output Parameters:
374: .  ptr0 - first of n pointers to allocate

376:    Notes
377:    This function is not normally called directly by users, but rather via the macros `PetscMalloc1()`, `PetscMalloc2()`, or `PetscCalloc1()`, etc.

379:    Level: developer

381: .seealso: `PetscMallocAlign()`, `PetscMallocSet()`, `PetscMalloc1()`, `PetscMalloc2()`, `PetscMalloc3()`, `PetscMalloc4()`, `PetscMalloc5()`, `PetscMalloc6()`, `PetscMalloc7()`, `PetscCalloc1()`, `PetscCalloc2()`, `PetscCalloc3()`, `PetscCalloc4()`, `PetscCalloc5()`, `PetscCalloc6()`, `PetscCalloc7()`, `PetscFreeA()`
382: @*/
383: PetscErrorCode PetscMallocA(int n, PetscBool clear, int lineno, const char *function, const char *filename, size_t bytes0, void *ptr0, ...)
384: {
385:   va_list Argp;
386:   size_t  bytes[8], sumbytes;
387:   void  **ptr[8];
388:   int     i;

391:   bytes[0] = bytes0;
392:   ptr[0]   = (void **)ptr0;
393:   sumbytes = (bytes0 + PETSC_MEMALIGN - 1) & ~(PETSC_MEMALIGN - 1);
394:   va_start(Argp, ptr0);
395:   for (i = 1; i < n; i++) {
396:     bytes[i] = va_arg(Argp, size_t);
397:     ptr[i]   = va_arg(Argp, void **);
398:     sumbytes += (bytes[i] + PETSC_MEMALIGN - 1) & ~(PETSC_MEMALIGN - 1);
399:   }
400:   va_end(Argp);
401:   if (petscmalloccoalesce) {
402:     char *p;
403:     (*PetscTrMalloc)(sumbytes, clear, lineno, function, filename, (void **)&p);
404:     if (p == NULL) {
405:       for (i = 0; i < n; i++) *ptr[i] = NULL;
406:     } else {
407:       for (i = 0; i < n; i++) {
408:         *ptr[i] = bytes[i] ? p : NULL;
409:         p       = (char *)PetscAddrAlign(p + bytes[i]);
410:       }
411:     }
412:   } else {
413:     for (i = 0; i < n; i++) (*PetscTrMalloc)(bytes[i], clear, lineno, function, filename, (void **)ptr[i]);
414:   }
415:   return 0;
416: }

418: /*@C
419:    PetscFreeA - Free one or more objects, possibly allocated using coalesced malloc

421:    Not Collective

423:    Input Parameters:
424: +  n - number of objects to free (at least 1)
425: .  lineno - line number to attribute deallocation (typically __LINE__)
426: .  function - function to attribute deallocation (typically PETSC_FUNCTION_NAME)
427: .  filename - file name to attribute deallocation (typically __FILE__)
428: -  ptr0 ... - first of n pointers to free

430:    Notes:
431:    This function is not normally called directly by users, but rather via the macros `PetscFree()`, `PetscFree2()`, etc.

433:    The pointers are zeroed to prevent users from accidentally reusing space that has been freed.

435:    Level: developer

437: .seealso: `PetscMallocAlign()`, `PetscMallocSet()`, `PetscMallocA()`, `PetscFree1()`, `PetscFree2()`, `PetscFree3()`, `PetscFree4()`, `PetscFree5()`, `PetscFree6()`, `PetscFree7()`
438: @*/
439: PetscErrorCode PetscFreeA(int n, int lineno, const char *function, const char *filename, void *ptr0, ...)
440: {
441:   va_list Argp;
442:   void  **ptr[8];
443:   int     i;

446:   ptr[0] = (void **)ptr0;
447:   va_start(Argp, ptr0);
448:   for (i = 1; i < n; i++) ptr[i] = va_arg(Argp, void **);
449:   va_end(Argp);
450:   if (petscmalloccoalesce) {
451:     for (i = 0; i < n; i++) { /* Find first nonempty allocation */
452:       if (*ptr[i]) break;
453:     }
454:     while (--n > i) *ptr[n] = NULL;
455:     (*PetscTrFree)(*ptr[n], lineno, function, filename);
456:     *ptr[n] = NULL;
457:   } else {
458:     while (--n >= 0) {
459:       (*PetscTrFree)(*ptr[n], lineno, function, filename);
460:       *ptr[n] = NULL;
461:     }
462:   }
463:   return 0;
464: }