Actual source code: plog.c
2: /*
3: PETSc code to log object creation and destruction and PETSc events.
5: This provides the public API used by the rest of PETSc and by users.
7: These routines use a private API that is not used elsewhere in PETSc and is not
8: accessible to users. The private API is defined in logimpl.h and the utils directory.
10: */
11: #include <petsc/private/logimpl.h>
12: #include <petsctime.h>
13: #include <petscviewer.h>
14: #include <petscdevice.h>
15: #include <petsc/private/deviceimpl.h>
17: PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT;
19: #if defined(PETSC_USE_LOG)
20: #include <petscmachineinfo.h>
21: #include <petscconfiginfo.h>
23: /* used in the MPI_XXX() count macros in petsclog.h */
25: /* Action and object logging variables */
26: Action *petsc_actions = NULL;
27: Object *petsc_objects = NULL;
28: PetscBool petsc_logActions = PETSC_FALSE;
29: PetscBool petsc_logObjects = PETSC_FALSE;
30: int petsc_numActions = 0, petsc_maxActions = 100;
31: int petsc_numObjects = 0, petsc_maxObjects = 100;
32: int petsc_numObjectsDestroyed = 0;
34: /* Global counters */
35: PetscLogDouble petsc_BaseTime = 0.0;
36: PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */
37: PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */
38: PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */
39: PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */
40: PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */
41: PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */
42: PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */
43: PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */
44: PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */
45: PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */
46: PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */
47: PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */
48: PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */
49: PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */
50: PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */
51: PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */
52: PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */
53: #if defined(PETSC_HAVE_DEVICE)
54: PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */
55: PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */
56: PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */
57: PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */
58: PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */
59: PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */
60: PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */
61: PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */
62: PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */
63: PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */
64: #endif
66: /* Logging functions */
67: PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL;
68: PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL;
69: PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL;
70: PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL;
72: /* Tracing event logging variables */
73: FILE *petsc_tracefile = NULL;
74: int petsc_tracelevel = 0;
75: const char *petsc_traceblanks = " ";
76: char petsc_tracespace[128] = " ";
77: PetscLogDouble petsc_tracetime = 0.0;
78: static PetscBool PetscLogInitializeCalled = PETSC_FALSE;
80: static PetscIntStack current_log_event_stack = NULL;
82: PETSC_INTERN PetscErrorCode PetscLogInitialize(void)
83: {
84: int stage;
85: PetscBool opt;
87: if (PetscLogInitializeCalled) return 0;
88: PetscLogInitializeCalled = PETSC_TRUE;
90: PetscIntStackCreate(¤t_log_event_stack);
91: PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt);
92: if (opt) petsc_logActions = PETSC_FALSE;
93: PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt);
94: if (opt) petsc_logObjects = PETSC_FALSE;
95: if (petsc_logActions) PetscMalloc1(petsc_maxActions, &petsc_actions);
96: if (petsc_logObjects) PetscMalloc1(petsc_maxObjects, &petsc_objects);
97: PetscLogPHC = PetscLogObjCreateDefault;
98: PetscLogPHD = PetscLogObjDestroyDefault;
99: /* Setup default logging structures */
100: PetscStageLogCreate(&petsc_stageLog);
101: PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage);
103: /* All processors sync here for more consistent logging */
104: MPI_Barrier(PETSC_COMM_WORLD);
105: PetscTime(&petsc_BaseTime);
106: PetscLogStagePush(stage);
107: return 0;
108: }
110: PETSC_INTERN PetscErrorCode PetscLogFinalize(void)
111: {
112: PetscStageLog stageLog;
114: PetscFree(petsc_actions);
115: PetscFree(petsc_objects);
116: PetscLogNestedEnd();
117: PetscLogSet(NULL, NULL);
119: /* Resetting phase */
120: PetscLogGetStageLog(&stageLog);
121: PetscStageLogDestroy(stageLog);
122: PetscIntStackDestroy(current_log_event_stack);
123: current_log_event_stack = NULL;
125: petsc_TotalFlops = 0.0;
126: petsc_numActions = 0;
127: petsc_numObjects = 0;
128: petsc_numObjectsDestroyed = 0;
129: petsc_maxActions = 100;
130: petsc_maxObjects = 100;
131: petsc_actions = NULL;
132: petsc_objects = NULL;
133: petsc_logActions = PETSC_FALSE;
134: petsc_logObjects = PETSC_FALSE;
135: petsc_BaseTime = 0.0;
136: petsc_TotalFlops = 0.0;
137: petsc_tmp_flops = 0.0;
138: petsc_send_ct = 0.0;
139: petsc_recv_ct = 0.0;
140: petsc_send_len = 0.0;
141: petsc_recv_len = 0.0;
142: petsc_isend_ct = 0.0;
143: petsc_irecv_ct = 0.0;
144: petsc_isend_len = 0.0;
145: petsc_irecv_len = 0.0;
146: petsc_wait_ct = 0.0;
147: petsc_wait_any_ct = 0.0;
148: petsc_wait_all_ct = 0.0;
149: petsc_sum_of_waits_ct = 0.0;
150: petsc_allreduce_ct = 0.0;
151: petsc_gather_ct = 0.0;
152: petsc_scatter_ct = 0.0;
153: #if defined(PETSC_HAVE_DEVICE)
154: petsc_ctog_ct = 0.0;
155: petsc_gtoc_ct = 0.0;
156: petsc_ctog_sz = 0.0;
157: petsc_gtoc_sz = 0.0;
158: petsc_gflops = 0.0;
159: petsc_gtime = 0.0;
160: #endif
161: PETSC_LARGEST_EVENT = PETSC_EVENT;
162: PetscLogPHC = NULL;
163: PetscLogPHD = NULL;
164: petsc_tracefile = NULL;
165: petsc_tracelevel = 0;
166: petsc_traceblanks = " ";
167: petsc_tracespace[0] = ' ';
168: petsc_tracespace[1] = 0;
169: petsc_tracetime = 0.0;
170: PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID;
171: PETSC_OBJECT_CLASSID = 0;
172: petsc_stageLog = NULL;
173: PetscLogInitializeCalled = PETSC_FALSE;
174: return 0;
175: }
177: /*@C
178: PetscLogSet - Sets the logging functions called at the beginning and ending of every event.
180: Not Collective
182: Input Parameters:
183: + b - The function called at beginning of event
184: - e - The function called at end of event
186: Level: developer
188: Developer Note:
189: The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`.
191: .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()`
192: @*/
193: PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject))
194: {
195: PetscLogPLB = b;
196: PetscLogPLE = e;
197: return 0;
198: }
200: /*@C
201: PetscLogIsActive - Check if logging is currently in progress.
203: Not Collective
205: Output Parameter:
206: . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise
208: Level: beginner
210: .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()`
211: @*/
212: PetscErrorCode PetscLogIsActive(PetscBool *isActive)
213: {
214: *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE;
215: return 0;
216: }
218: /*@C
219: PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop
220: rates and object creation and should not slow programs down too much.
221: This routine may be called more than once.
223: Logically Collective over `PETSC_COMM_WORLD`
225: Options Database Key:
226: . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the
227: screen (for code configured with --with-log=1 (which is the default))
229: Usage:
230: .vb
231: PetscInitialize(...);
232: PetscLogDefaultBegin();
233: ... code ...
234: PetscLogView(viewer); or PetscLogDump();
235: PetscFinalize();
236: .ve
238: Level: advanced
240: Note:
241: `PetscLogView()` or `PetscLogDump()` actually cause the printing of
242: the logging information.
244: .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()`
245: @*/
246: PetscErrorCode PetscLogDefaultBegin(void)
247: {
248: PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault);
249: return 0;
250: }
252: /*@C
253: PetscLogAllBegin - Turns on extensive logging of objects and events. Logs
254: all events. This creates large log files and slows the program down.
256: Logically Collective on `PETSC_COMM_WORLD`
258: Options Database Key:
259: . -log_all - Prints extensive log information
261: Usage:
262: .vb
263: PetscInitialize(...);
264: PetscLogAllBegin();
265: ... code ...
266: PetscLogDump(filename);
267: PetscFinalize();
268: .ve
270: Level: advanced
272: Note:
273: A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is
274: intended for production runs since it logs only flop rates and object
275: creation (and shouldn't significantly slow the programs).
277: .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()`
278: @*/
279: PetscErrorCode PetscLogAllBegin(void)
280: {
281: PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete);
282: return 0;
283: }
285: /*@C
286: PetscLogTraceBegin - Activates trace logging. Every time a PETSc event
287: begins or ends, the event name is printed.
289: Logically Collective on `PETSC_COMM_WORLD`
291: Input Parameter:
292: . file - The file to print trace in (e.g. stdout)
294: Options Database Key:
295: . -log_trace [filename] - Activates `PetscLogTraceBegin()`
297: Level: intermediate
299: Notes:
300: `PetscLogTraceBegin()` prints the processor number, the execution time (sec),
301: then "Event begin:" or "Event end:" followed by the event name.
303: `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful
304: to determine where a program is hanging without running in the
305: debugger. Can be used in conjunction with the -info option.
307: .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()`
308: @*/
309: PetscErrorCode PetscLogTraceBegin(FILE *file)
310: {
311: petsc_tracefile = file;
313: PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace);
314: return 0;
315: }
317: /*@
318: PetscLogActions - Determines whether actions are logged for the graphical viewer.
320: Not Collective
322: Input Parameter:
323: . flag - `PETSC_TRUE` if actions are to be logged
325: Options Database Key:
326: . -log_exclude_actions - Turns off actions logging
328: Level: intermediate
330: Note:
331: Logging of actions continues to consume more memory as the program
332: runs. Long running programs should consider turning this feature off.
333: .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()`
334: @*/
335: PetscErrorCode PetscLogActions(PetscBool flag)
336: {
337: petsc_logActions = flag;
338: return 0;
339: }
341: /*@
342: PetscLogObjects - Determines whether objects are logged for the graphical viewer.
344: Not Collective
346: Input Parameter:
347: . flag - `PETSC_TRUE` if objects are to be logged
349: Options Database Key:
350: . -log_exclude_objects - Turns off objects logging
352: Level: intermediate
354: Note:
355: Logging of objects continues to consume more memory as the program
356: runs. Long running programs should consider turning this feature off.
358: .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()`
359: @*/
360: PetscErrorCode PetscLogObjects(PetscBool flag)
361: {
362: petsc_logObjects = flag;
363: return 0;
364: }
366: /*------------------------------------------------ Stage Functions --------------------------------------------------*/
367: /*@C
368: PetscLogStageRegister - Attaches a character string name to a logging stage.
370: Not Collective
372: Input Parameter:
373: . sname - The name to associate with that stage
375: Output Parameter:
376: . stage - The stage number
378: Level: intermediate
380: .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()`
381: @*/
382: PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage)
383: {
384: PetscStageLog stageLog;
385: PetscLogEvent event;
387: PetscLogGetStageLog(&stageLog);
388: PetscStageLogRegister(stageLog, sname, stage);
389: /* Copy events already changed in the main stage, this sucks */
390: PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents);
391: for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event]);
392: PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses);
393: return 0;
394: }
396: /*@C
397: PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage
399: Not Collective
401: Input Parameter:
402: . stage - The stage on which to log
404: Usage:
405: If the option -log_view is used to run the program containing the
406: following code, then 2 sets of summary data will be printed during
407: PetscFinalize().
408: .vb
409: PetscInitialize(int *argc,char ***args,0,0);
410: [stage 0 of code]
411: PetscLogStagePush(1);
412: [stage 1 of code]
413: PetscLogStagePop();
414: PetscBarrier(...);
415: [more stage 0 of code]
416: PetscFinalize();
417: .ve
419: Level: intermediate
421: Note:
422: Use `PetscLogStageRegister()` to register a stage.
424: .seealso: [](ch_profiling), `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()`
425: @*/
426: PetscErrorCode PetscLogStagePush(PetscLogStage stage)
427: {
428: PetscStageLog stageLog;
430: PetscLogGetStageLog(&stageLog);
431: PetscStageLogPush(stageLog, stage);
432: return 0;
433: }
435: /*@C
436: PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()`
438: Not Collective
440: Usage:
441: If the option -log_view is used to run the program containing the
442: following code, then 2 sets of summary data will be printed during
443: PetscFinalize().
444: .vb
445: PetscInitialize(int *argc,char ***args,0,0);
446: [stage 0 of code]
447: PetscLogStagePush(1);
448: [stage 1 of code]
449: PetscLogStagePop();
450: PetscBarrier(...);
451: [more stage 0 of code]
452: PetscFinalize();
453: .ve
455: Level: intermediate
457: .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()`
458: @*/
459: PetscErrorCode PetscLogStagePop(void)
460: {
461: PetscStageLog stageLog;
463: PetscLogGetStageLog(&stageLog);
464: PetscStageLogPop(stageLog);
465: return 0;
466: }
468: /*@
469: PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`.
471: Not Collective
473: Input Parameters:
474: + stage - The stage
475: - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)
477: Level: intermediate
479: Note:
480: If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist
482: .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
483: @*/
484: PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive)
485: {
486: PetscStageLog stageLog;
488: PetscLogGetStageLog(&stageLog);
489: PetscStageLogSetActive(stageLog, stage, isActive);
490: return 0;
491: }
493: /*@
494: PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`.
496: Not Collective
498: Input Parameter:
499: . stage - The stage
501: Output Parameter:
502: . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)
504: Level: intermediate
506: .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
507: @*/
508: PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive)
509: {
510: PetscStageLog stageLog;
512: PetscLogGetStageLog(&stageLog);
513: PetscStageLogGetActive(stageLog, stage, isActive);
514: return 0;
515: }
517: /*@
518: PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()`
520: Not Collective
522: Input Parameters:
523: + stage - The stage
524: - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)
526: Level: intermediate
528: Developer Note:
529: What does visible mean, needs to be documented.
531: .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()`
532: @*/
533: PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible)
534: {
535: PetscStageLog stageLog;
537: PetscLogGetStageLog(&stageLog);
538: PetscStageLogSetVisible(stageLog, stage, isVisible);
539: return 0;
540: }
542: /*@
543: PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()`
545: Not Collective
547: Input Parameter:
548: . stage - The stage
550: Output Parameter:
551: . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`)
553: Level: intermediate
555: .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()`
556: @*/
557: PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible)
558: {
559: PetscStageLog stageLog;
561: PetscLogGetStageLog(&stageLog);
562: PetscStageLogGetVisible(stageLog, stage, isVisible);
563: return 0;
564: }
566: /*@C
567: PetscLogStageGetId - Returns the stage id when given the stage name.
569: Not Collective
571: Input Parameter:
572: . name - The stage name
574: Output Parameter:
575: . stage - The stage, , or -1 if no stage with that name exists
577: Level: intermediate
579: .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
580: @*/
581: PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage)
582: {
583: PetscStageLog stageLog;
585: PetscLogGetStageLog(&stageLog);
586: PetscStageLogGetStage(stageLog, name, stage);
587: return 0;
588: }
590: /*------------------------------------------------ Event Functions --------------------------------------------------*/
592: /*@C
593: PetscLogEventRegister - Registers an event name for logging operations
595: Not Collective
597: Input Parameters:
598: + name - The name associated with the event
599: - classid - The classid associated to the class for this event, obtain either with
600: `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones
601: are only available in C code
603: Output Parameter:
604: . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`.
606: Example of Usage:
607: .vb
608: PetscLogEvent USER_EVENT;
609: PetscClassId classid;
610: PetscLogDouble user_event_flops;
611: PetscClassIdRegister("class name",&classid);
612: PetscLogEventRegister("User event name",classid,&USER_EVENT);
613: PetscLogEventBegin(USER_EVENT,0,0,0,0);
614: [code segment to monitor]
615: PetscLogFlops(user_event_flops);
616: PetscLogEventEnd(USER_EVENT,0,0,0,0);
617: .ve
619: Level: intermediate
621: Notes:
622: PETSc automatically logs library events if the code has been
623: configured with --with-log (which is the default) and
624: -log_view or -log_all is specified. `PetscLogEventRegister()` is
625: intended for logging user events to supplement this PETSc
626: information.
628: PETSc can gather data for use with the utilities Jumpshot
629: (part of the MPICH distribution). If PETSc has been compiled
630: with flag -DPETSC_HAVE_MPE (MPE is an additional utility within
631: MPICH), the user can employ another command line option, -log_mpe,
632: to create a logfile, "mpe.log", which can be visualized
633: Jumpshot.
635: The classid is associated with each event so that classes of events
636: can be disabled simultaneously, such as all matrix events. The user
637: can either use an existing classid, such as `MAT_CLASSID`, or create
638: their own as shown in the example.
640: If an existing event with the same name exists, its event handle is
641: returned instead of creating a new event.
643: .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`,
644: `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()`
645: @*/
646: PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event)
647: {
648: PetscStageLog stageLog;
649: int stage;
651: *event = PETSC_DECIDE;
652: PetscLogGetStageLog(&stageLog);
653: PetscEventRegLogGetEvent(stageLog->eventLog, name, event);
654: if (*event > 0) return 0;
655: PetscEventRegLogRegister(stageLog->eventLog, name, classid, event);
656: for (stage = 0; stage < stageLog->numStages; stage++) {
657: PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents);
658: PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
659: }
660: return 0;
661: }
663: /*@
664: PetscLogEventSetCollective - Indicates that a particular event is collective.
666: Not Collective
668: Input Parameters:
669: + event - The event id
670: - collective - Boolean flag indicating whether a particular event is collective
672: Level: developer
674: Notes:
675: New events returned from `PetscLogEventRegister()` are collective by default.
677: Collective events are handled specially if the -log_sync is used. In that case the logging saves information about
678: two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication
679: to be performed. This option is useful to debug imbalance within the computations or communications
681: .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()`
682: @*/
683: PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective)
684: {
685: PetscStageLog stageLog;
686: PetscEventRegLog eventRegLog;
688: PetscLogGetStageLog(&stageLog);
689: PetscStageLogGetEventRegLog(stageLog, &eventRegLog);
691: eventRegLog->eventInfo[event].collective = collective;
692: return 0;
693: }
695: /*@
696: PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage.
698: Not Collective
700: Input Parameter:
701: . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.
703: Level: developer
705: .seealso: [](ch_profiling), `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()`
706: @*/
707: PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid)
708: {
709: PetscStageLog stageLog;
710: int stage;
712: PetscLogGetStageLog(&stageLog);
713: for (stage = 0; stage < stageLog->numStages; stage++) PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
714: return 0;
715: }
717: /*@
718: PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage.
720: Not Collective
722: Input Parameter:
723: . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.
725: Level: developer
727: Note:
728: If a class is excluded then events associated with that class are not logged.
730: .seealso: [](ch_profiling), `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()`
731: @*/
732: PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid)
733: {
734: PetscStageLog stageLog;
735: int stage;
737: PetscLogGetStageLog(&stageLog);
738: for (stage = 0; stage < stageLog->numStages; stage++) PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
739: return 0;
740: }
742: /*@
743: PetscLogEventActivate - Indicates that a particular event should be logged.
745: Not Collective
747: Input Parameter:
748: . event - The event id
750: Usage:
751: .vb
752: PetscLogEventDeactivate(VEC_SetValues);
753: [code where you do not want to log VecSetValues()]
754: PetscLogEventActivate(VEC_SetValues);
755: [code where you do want to log VecSetValues()]
756: .ve
758: Level: advanced
760: Note:
761: The event may be either a pre-defined PETSc event (found in include/petsclog.h)
762: or an event number obtained with `PetscLogEventRegister()`.
764: .seealso: [](ch_profiling), `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()`
765: @*/
766: PetscErrorCode PetscLogEventActivate(PetscLogEvent event)
767: {
768: PetscStageLog stageLog;
769: int stage;
771: PetscLogGetStageLog(&stageLog);
772: PetscStageLogGetCurrent(stageLog, &stage);
773: PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
774: return 0;
775: }
777: /*@
778: PetscLogEventDeactivate - Indicates that a particular event should not be logged.
780: Not Collective
782: Input Parameter:
783: . event - The event id
785: Usage:
786: .vb
787: PetscLogEventDeactivate(VEC_SetValues);
788: [code where you do not want to log VecSetValues()]
789: PetscLogEventActivate(VEC_SetValues);
790: [code where you do want to log VecSetValues()]
791: .ve
793: Level: advanced
795: Note:
796: The event may be either a pre-defined PETSc event (found in
797: include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`).
799: .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()`
800: @*/
801: PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event)
802: {
803: PetscStageLog stageLog;
804: int stage;
806: PetscLogGetStageLog(&stageLog);
807: PetscStageLogGetCurrent(stageLog, &stage);
808: PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
809: return 0;
810: }
812: /*@
813: PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called
815: Not Collective
817: Input Parameter:
818: . event - The event id
820: Usage:
821: .vb
822: PetscLogEventDeactivatePush(VEC_SetValues);
823: [code where you do not want to log VecSetValues()]
824: PetscLogEventDeactivatePop(VEC_SetValues);
825: [code where you do want to log VecSetValues()]
826: .ve
828: Level: advanced
830: Note:
831: The event may be either a pre-defined PETSc event (found in
832: include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`).
834: .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()`
835: @*/
836: PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event)
837: {
838: PetscStageLog stageLog;
839: int stage;
841: PetscLogGetStageLog(&stageLog);
842: PetscStageLogGetCurrent(stageLog, &stage);
843: PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event);
844: return 0;
845: }
847: /*@
848: PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()`
850: Not Collective
852: Input Parameter:
853: . event - The event id
855: Usage:
856: .vb
857: PetscLogEventDeactivatePush(VEC_SetValues);
858: [code where you do not want to log VecSetValues()]
859: PetscLogEventDeactivatePop(VEC_SetValues);
860: [code where you do want to log VecSetValues()]
861: .ve
863: Level: advanced
865: Note:
866: The event may be either a pre-defined PETSc event (found in
867: include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`).
869: .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`
870: @*/
871: PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event)
872: {
873: PetscStageLog stageLog;
874: int stage;
876: PetscLogGetStageLog(&stageLog);
877: PetscStageLogGetCurrent(stageLog, &stage);
878: PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event);
879: return 0;
880: }
882: /*@
883: PetscLogEventSetActiveAll - Turns on logging of all events
885: Not Collective
887: Input Parameters:
888: + event - The event id
889: - isActive - The activity flag determining whether the event is logged
891: Level: advanced
893: .seealso: [](ch_profiling), `PlogEventActivate()`, `PlogEventDeactivate()`
894: @*/
895: PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive)
896: {
897: PetscStageLog stageLog;
898: int stage;
900: PetscLogGetStageLog(&stageLog);
901: for (stage = 0; stage < stageLog->numStages; stage++) {
902: if (isActive) {
903: PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
904: } else {
905: PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
906: }
907: }
908: return 0;
909: }
911: /*@
912: PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage
914: Not Collective
916: Input Parameter:
917: . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.
919: Level: developer
921: .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()`
922: @*/
923: PetscErrorCode PetscLogEventActivateClass(PetscClassId classid)
924: {
925: PetscStageLog stageLog;
926: int stage;
928: PetscLogGetStageLog(&stageLog);
929: PetscStageLogGetCurrent(stageLog, &stage);
930: PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
931: return 0;
932: }
934: /*@
935: PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage
937: Not Collective
939: Input Parameter:
940: . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc.
942: Level: developer
944: .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()`
945: @*/
946: PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid)
947: {
948: PetscStageLog stageLog;
949: int stage;
951: PetscLogGetStageLog(&stageLog);
952: PetscStageLogGetCurrent(stageLog, &stage);
953: PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
954: return 0;
955: }
957: /*MC
958: PetscLogEventSync - Synchronizes the beginning of a user event.
960: Synopsis:
961: #include <petsclog.h>
962: PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm)
964: Collective
966: Input Parameters:
967: + e - integer associated with the event obtained from PetscLogEventRegister()
968: - comm - an MPI communicator
970: Usage:
971: .vb
972: PetscLogEvent USER_EVENT;
973: PetscLogEventRegister("User event",0,&USER_EVENT);
974: PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD);
975: PetscLogEventBegin(USER_EVENT,0,0,0,0);
976: [code segment to monitor]
977: PetscLogEventEnd(USER_EVENT,0,0,0,0);
978: .ve
980: Level: developer
982: Note:
983: This routine should be called only if there is not a
984: `PetscObject` available to pass to `PetscLogEventBegin()`.
986: .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`
987: M*/
989: /*MC
990: PetscLogEventBegin - Logs the beginning of a user event.
992: Synopsis:
993: #include <petsclog.h>
994: PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4)
996: Not Collective
998: Input Parameters:
999: + e - integer associated with the event obtained from PetscLogEventRegister()
1000: - o1,o2,o3,o4 - objects associated with the event, or 0
1002: Fortran Synopsis:
1003: void PetscLogEventBegin(int e,PetscErrorCode ierr)
1005: Usage:
1006: .vb
1007: PetscLogEvent USER_EVENT;
1008: PetscLogDouble user_event_flops;
1009: PetscLogEventRegister("User event",0,&USER_EVENT);
1010: PetscLogEventBegin(USER_EVENT,0,0,0,0);
1011: [code segment to monitor]
1012: PetscLogFlops(user_event_flops);
1013: PetscLogEventEnd(USER_EVENT,0,0,0,0);
1014: .ve
1016: Level: intermediate
1018: Developer Note:
1019: `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the
1020: errors that occur in the macro directly because other packages that use this macros have used them in their
1021: own functions or methods that do not return error codes and it would be disruptive to change the current
1022: behavior.
1024: .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()`
1025: M*/
1027: /*MC
1028: PetscLogEventEnd - Log the end of a user event.
1030: Synopsis:
1031: #include <petsclog.h>
1032: PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4)
1034: Not Collective
1036: Input Parameters:
1037: + e - integer associated with the event obtained with PetscLogEventRegister()
1038: - o1,o2,o3,o4 - objects associated with the event, or 0
1040: Fortran Synopsis:
1041: void PetscLogEventEnd(int e,PetscErrorCode ierr)
1043: Usage:
1044: .vb
1045: PetscLogEvent USER_EVENT;
1046: PetscLogDouble user_event_flops;
1047: PetscLogEventRegister("User event",0,&USER_EVENT,);
1048: PetscLogEventBegin(USER_EVENT,0,0,0,0);
1049: [code segment to monitor]
1050: PetscLogFlops(user_event_flops);
1051: PetscLogEventEnd(USER_EVENT,0,0,0,0);
1052: .ve
1054: Level: intermediate
1056: .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()`
1057: M*/
1059: /*@C
1060: PetscLogEventGetId - Returns the event id when given the event name.
1062: Not Collective
1064: Input Parameter:
1065: . name - The event name
1067: Output Parameter:
1068: . event - The event, or -1 if no event with that name exists
1070: Level: intermediate
1072: .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()`
1073: @*/
1074: PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event)
1075: {
1076: PetscStageLog stageLog;
1078: PetscLogGetStageLog(&stageLog);
1079: PetscEventRegLogGetEvent(stageLog->eventLog, name, event);
1080: return 0;
1081: }
1083: PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event)
1084: {
1085: PetscIntStackPush(current_log_event_stack, event);
1086: return 0;
1087: }
1089: PetscErrorCode PetscLogPopCurrentEvent_Internal(void)
1090: {
1091: PetscIntStackPop(current_log_event_stack, NULL);
1092: return 0;
1093: }
1095: PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event)
1096: {
1097: PetscBool empty;
1100: *event = PETSC_DECIDE;
1101: PetscIntStackEmpty(current_log_event_stack, &empty);
1102: if (!empty) PetscIntStackTop(current_log_event_stack, event);
1103: return 0;
1104: }
1106: PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event)
1107: {
1108: if (event != PETSC_DECIDE) PetscLogEventEnd(event, NULL, NULL, NULL, NULL);
1109: return 0;
1110: }
1112: PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event)
1113: {
1114: PetscStageLog stageLog;
1115: PetscEventPerfLog eventLog;
1116: int stage;
1118: if (event == PETSC_DECIDE) return 0;
1119: PetscLogEventBegin(event, NULL, NULL, NULL, NULL);
1120: PetscLogGetStageLog(&stageLog);
1121: PetscStageLogGetCurrent(stageLog, &stage);
1122: PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog);
1123: eventLog->eventInfo[event].count--;
1124: return 0;
1125: }
1127: /*------------------------------------------------ Output Functions -------------------------------------------------*/
1128: /*@C
1129: PetscLogDump - Dumps logs of objects to a file. This file is intended to
1130: be read by bin/petscview. This program no longer exists.
1132: Collective on `PETSC_COMM_WORLD`
1134: Input Parameter:
1135: . name - an optional file name
1137: Usage:
1138: .vb
1139: PetscInitialize(...);
1140: PetscLogDefaultBegin(); or PetscLogAllBegin();
1141: ... code ...
1142: PetscLogDump(filename);
1143: PetscFinalize();
1144: .ve
1146: Level: advanced
1148: Note:
1149: The default file name is
1150: $ Log.<rank>
1151: where <rank> is the processor number. If no name is specified,
1152: this file will be used.
1154: .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()`
1155: @*/
1156: PetscErrorCode PetscLogDump(const char sname[])
1157: {
1158: PetscStageLog stageLog;
1159: PetscEventPerfInfo *eventInfo;
1160: FILE *fd;
1161: char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN];
1162: PetscLogDouble flops, _TotalTime;
1163: PetscMPIInt rank;
1164: int action, object, curStage;
1165: PetscLogEvent event;
1167: /* Calculate the total elapsed time */
1168: PetscTime(&_TotalTime);
1169: _TotalTime -= petsc_BaseTime;
1170: /* Open log file */
1171: MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
1172: if (sname && sname[0]) sprintf(file, "%s.%d", sname, rank);
1173: else sprintf(file, "Log.%d", rank);
1174: PetscFixFilename(file, fname);
1175: PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd);
1177: /* Output totals */
1178: PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime);
1179: PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0);
1180: /* Output actions */
1181: if (petsc_logActions) {
1182: PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions);
1183: for (action = 0; action < petsc_numActions; action++) {
1184: PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1,
1185: petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem));
1186: }
1187: }
1188: /* Output objects */
1189: if (petsc_logObjects) {
1190: PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed);
1191: for (object = 0; object < petsc_numObjects; object++) {
1192: PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem);
1193: if (!petsc_objects[object].name[0]) {
1194: PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n");
1195: } else {
1196: PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name);
1197: }
1198: if (petsc_objects[object].info[0] != 0) {
1199: PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n");
1200: } else {
1201: PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info);
1202: }
1203: }
1204: }
1205: /* Output events */
1206: PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n");
1207: PetscLogGetStageLog(&stageLog);
1208: PetscIntStackTop(stageLog->stack, &curStage);
1209: eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo;
1210: for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) {
1211: if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time;
1212: else flops = 0.0;
1213: PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops);
1214: }
1215: PetscFClose(PETSC_COMM_WORLD, fd);
1216: return 0;
1217: }
1219: /*
1220: PetscLogView_Detailed - Each process prints the times for its own events
1222: */
1223: PetscErrorCode PetscLogView_Detailed(PetscViewer viewer)
1224: {
1225: PetscStageLog stageLog;
1226: PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL;
1227: PetscLogDouble locTotalTime, numRed, maxMem;
1228: int numStages, numEvents, stage, event;
1229: MPI_Comm comm = PetscObjectComm((PetscObject)viewer);
1230: PetscMPIInt rank, size;
1232: MPI_Comm_size(comm, &size);
1233: MPI_Comm_rank(comm, &rank);
1234: /* Must preserve reduction count before we go on */
1235: numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
1236: /* Get the total elapsed time */
1237: PetscTime(&locTotalTime);
1238: locTotalTime -= petsc_BaseTime;
1239: PetscViewerASCIIPrintf(viewer, "size = %d\n", size);
1240: PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n");
1241: PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n");
1242: PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n");
1243: PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n");
1244: PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n");
1245: PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n");
1246: PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n");
1247: PetscLogGetStageLog(&stageLog);
1248: MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1249: PetscViewerASCIIPrintf(viewer, "Stages = {}\n");
1250: for (stage = 0; stage < numStages; stage++) {
1251: PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name);
1252: PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name);
1253: MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1254: for (event = 0; event < numEvents; event++) PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name);
1255: }
1256: PetscMallocGetMaximumUsage(&maxMem);
1257: PetscViewerASCIIPushSynchronized(viewer);
1258: PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime);
1259: PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct));
1260: PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len));
1261: PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed);
1262: PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops);
1263: PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects);
1264: PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem);
1265: PetscViewerFlush(viewer);
1266: for (stage = 0; stage < numStages; stage++) {
1267: stageInfo = &stageLog->stageInfo[stage].perfInfo;
1268: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time,
1269: stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops));
1270: MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1271: for (event = 0; event < numEvents; event++) {
1272: eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event];
1273: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g",
1274: stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions,
1275: eventInfo->flops));
1276: if (eventInfo->dof[0] >= 0.) {
1277: PetscInt d, e;
1279: PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [");
1280: for (d = 0; d < 8; ++d) {
1281: if (d > 0) PetscViewerASCIISynchronizedPrintf(viewer, ", ");
1282: PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d]);
1283: }
1284: PetscViewerASCIISynchronizedPrintf(viewer, "]");
1285: PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [");
1286: for (e = 0; e < 8; ++e) {
1287: if (e > 0) PetscViewerASCIISynchronizedPrintf(viewer, ", ");
1288: PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e]);
1289: }
1290: PetscViewerASCIISynchronizedPrintf(viewer, "]");
1291: }
1292: PetscViewerASCIISynchronizedPrintf(viewer, "}\n");
1293: }
1294: }
1295: PetscViewerFlush(viewer);
1296: PetscViewerASCIIPopSynchronized(viewer);
1297: return 0;
1298: }
1300: /*
1301: PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format
1302: */
1303: PetscErrorCode PetscLogView_CSV(PetscViewer viewer)
1304: {
1305: PetscStageLog stageLog;
1306: PetscEventPerfInfo *eventInfo = NULL;
1307: PetscLogDouble locTotalTime, maxMem;
1308: int numStages, numEvents, stage, event;
1309: MPI_Comm comm = PetscObjectComm((PetscObject)viewer);
1310: PetscMPIInt rank, size;
1312: MPI_Comm_size(comm, &size);
1313: MPI_Comm_rank(comm, &rank);
1314: /* Must preserve reduction count before we go on */
1315: /* Get the total elapsed time */
1316: PetscTime(&locTotalTime);
1317: locTotalTime -= petsc_BaseTime;
1318: PetscLogGetStageLog(&stageLog);
1319: MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1320: PetscMallocGetMaximumUsage(&maxMem);
1321: PetscViewerASCIIPushSynchronized(viewer);
1322: PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size);
1323: PetscViewerFlush(viewer);
1324: for (stage = 0; stage < numStages; stage++) {
1325: PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo;
1327: PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops);
1328: MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1329: for (event = 0; event < numEvents; event++) {
1330: eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event];
1331: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength,
1332: eventInfo->numReductions, eventInfo->flops));
1333: if (eventInfo->dof[0] >= 0.) {
1334: PetscInt d, e;
1336: for (d = 0; d < 8; ++d) PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d]);
1337: for (e = 0; e < 8; ++e) PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e]);
1338: }
1339: PetscViewerASCIISynchronizedPrintf(viewer, "\n");
1340: }
1341: }
1342: PetscViewerFlush(viewer);
1343: PetscViewerASCIIPopSynchronized(viewer);
1344: return 0;
1345: }
1347: static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd)
1348: {
1349: if (!PetscLogSyncOn) return 0;
1350: PetscFPrintf(comm, fd, "\n\n");
1351: PetscFPrintf(comm, fd, " ##########################################################\n");
1352: PetscFPrintf(comm, fd, " # #\n");
1353: PetscFPrintf(comm, fd, " # WARNING!!! #\n");
1354: PetscFPrintf(comm, fd, " # #\n");
1355: PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n");
1356: PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n");
1357: PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n");
1358: PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n");
1359: PetscFPrintf(comm, fd, " # #\n");
1360: PetscFPrintf(comm, fd, " ##########################################################\n\n\n");
1361: return 0;
1362: }
1364: static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd)
1365: {
1366: if (PetscDefined(USE_DEBUG)) {
1367: PetscFPrintf(comm, fd, "\n\n");
1368: PetscFPrintf(comm, fd, " ##########################################################\n");
1369: PetscFPrintf(comm, fd, " # #\n");
1370: PetscFPrintf(comm, fd, " # WARNING!!! #\n");
1371: PetscFPrintf(comm, fd, " # #\n");
1372: PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n");
1373: PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n");
1374: PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n");
1375: PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n");
1376: PetscFPrintf(comm, fd, " # #\n");
1377: PetscFPrintf(comm, fd, " ##########################################################\n\n\n");
1378: }
1379: return 0;
1380: }
1382: static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd)
1383: {
1384: #if defined(PETSC_HAVE_DEVICE)
1385: PetscMPIInt size;
1386: PetscBool deviceInitialized = PETSC_FALSE;
1388: MPI_Comm_size(comm, &size);
1389: for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) {
1390: const PetscDeviceType dtype = PetscDeviceTypeCast(i);
1391: if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */
1392: deviceInitialized = PETSC_TRUE;
1393: break;
1394: }
1395: }
1396: /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */
1397: if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) return 0;
1398: PetscFPrintf(comm, fd, "\n\n");
1399: PetscFPrintf(comm, fd, " ##########################################################\n");
1400: PetscFPrintf(comm, fd, " # #\n");
1401: PetscFPrintf(comm, fd, " # WARNING!!! #\n");
1402: PetscFPrintf(comm, fd, " # #\n");
1403: PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n");
1404: PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n");
1405: PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n");
1406: PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n");
1407: PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n");
1408: PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n");
1409: PetscFPrintf(comm, fd, " # #\n");
1410: PetscFPrintf(comm, fd, " ##########################################################\n\n\n");
1411: return 0;
1412: #else
1413: return 0;
1414: #endif
1415: }
1417: static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd)
1418: {
1419: #if defined(PETSC_HAVE_DEVICE)
1421: if (!PetscLogGpuTimeFlag || petsc_gflops == 0) return 0;
1422: PetscFPrintf(comm, fd, "\n\n");
1423: PetscFPrintf(comm, fd, " ##########################################################\n");
1424: PetscFPrintf(comm, fd, " # #\n");
1425: PetscFPrintf(comm, fd, " # WARNING!!! #\n");
1426: PetscFPrintf(comm, fd, " # #\n");
1427: PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n");
1428: PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n");
1429: PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n");
1430: PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n");
1431: PetscFPrintf(comm, fd, " # not using this option. #\n");
1432: PetscFPrintf(comm, fd, " # #\n");
1433: PetscFPrintf(comm, fd, " ##########################################################\n\n\n");
1434: return 0;
1435: #else
1436: return 0;
1437: #endif
1438: }
1440: PetscErrorCode PetscLogView_Default(PetscViewer viewer)
1441: {
1442: FILE *fd;
1443: PetscLogDouble zero = 0.0;
1444: PetscStageLog stageLog;
1445: PetscStageInfo *stageInfo = NULL;
1446: PetscEventPerfInfo *eventInfo = NULL;
1447: PetscClassPerfInfo *classInfo;
1448: char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
1449: const char *name;
1450: PetscLogDouble locTotalTime, TotalTime, TotalFlops;
1451: PetscLogDouble numMessages, messageLength, avgMessLen, numReductions;
1452: PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red;
1453: PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed;
1454: PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed;
1455: PetscLogDouble min, max, tot, ratio, avg, x, y;
1456: PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax;
1457: #if defined(PETSC_HAVE_DEVICE)
1458: PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */
1459: PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops;
1460: #endif
1461: PetscMPIInt minC, maxC;
1462: PetscMPIInt size, rank;
1463: PetscBool *localStageUsed, *stageUsed;
1464: PetscBool *localStageVisible, *stageVisible;
1465: int numStages, localNumEvents, numEvents;
1466: int stage, oclass;
1467: PetscLogEvent event;
1468: PetscErrorCode 0;
1469: char version[256];
1470: MPI_Comm comm;
1471: #if defined(PETSC_HAVE_DEVICE)
1472: PetscLogEvent eventid;
1473: PetscInt64 nas = 0x7FF0000000000002;
1474: #endif
1476: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1477: PetscObjectGetComm((PetscObject)viewer, &comm);
1478: PetscViewerASCIIGetPointer(viewer, &fd);
1479: MPI_Comm_size(comm, &size);
1480: MPI_Comm_rank(comm, &rank);
1481: /* Get the total elapsed time */
1482: PetscTime(&locTotalTime);
1483: locTotalTime -= petsc_BaseTime;
1485: PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n");
1486: PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n");
1487: PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n");
1488: PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n");
1489: PetscLogViewWarnSync(comm, fd);
1490: PetscLogViewWarnDebugging(comm, fd);
1491: PetscLogViewWarnNoGpuAwareMpi(comm, fd);
1492: PetscLogViewWarnGpuTime(comm, fd);
1493: PetscGetArchType(arch, sizeof(arch));
1494: PetscGetHostName(hostname, sizeof(hostname));
1495: PetscGetUserName(username, sizeof(username));
1496: PetscGetProgramName(pname, sizeof(pname));
1497: PetscGetDate(date, sizeof(date));
1498: PetscGetVersion(version, sizeof(version));
1499: if (size == 1) {
1500: PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date);
1501: } else {
1502: PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date);
1503: }
1504: #if defined(PETSC_HAVE_OPENMP)
1505: PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads);
1506: #endif
1507: PetscFPrintf(comm, fd, "Using %s\n", version);
1509: /* Must preserve reduction count before we go on */
1510: red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
1512: /* Calculate summary information */
1513: PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n");
1514: /* Time */
1515: MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1516: MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1517: MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1518: avg = tot / ((PetscLogDouble)size);
1519: if (min != 0.0) ratio = max / min;
1520: else ratio = 0.0;
1521: PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg);
1522: TotalTime = tot;
1523: /* Objects */
1524: avg = (PetscLogDouble)petsc_numObjects;
1525: MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1526: MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1527: MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1528: avg = tot / ((PetscLogDouble)size);
1529: if (min != 0.0) ratio = max / min;
1530: else ratio = 0.0;
1531: PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg);
1532: /* Flops */
1533: MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1534: MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1535: MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1536: avg = tot / ((PetscLogDouble)size);
1537: if (min != 0.0) ratio = max / min;
1538: else ratio = 0.0;
1539: PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot);
1540: TotalFlops = tot;
1541: /* Flops/sec -- Must talk to Barry here */
1542: if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime;
1543: else flops = 0.0;
1544: MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1545: MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1546: MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1547: avg = tot / ((PetscLogDouble)size);
1548: if (min != 0.0) ratio = max / min;
1549: else ratio = 0.0;
1550: PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot);
1551: /* Memory */
1552: PetscMallocGetMaximumUsage(&mem);
1553: if (mem > 0.0) {
1554: MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1555: MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1556: MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1557: avg = tot / ((PetscLogDouble)size);
1558: if (min != 0.0) ratio = max / min;
1559: else ratio = 0.0;
1560: PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot);
1561: }
1562: /* Messages */
1563: mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
1564: MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1565: MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1566: MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1567: avg = tot / ((PetscLogDouble)size);
1568: if (min != 0.0) ratio = max / min;
1569: else ratio = 0.0;
1570: PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot);
1571: numMessages = tot;
1572: /* Message Lengths */
1573: mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
1574: MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1575: MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1576: MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1577: if (numMessages != 0) avg = tot / numMessages;
1578: else avg = 0.0;
1579: if (min != 0.0) ratio = max / min;
1580: else ratio = 0.0;
1581: PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot);
1582: messageLength = tot;
1583: /* Reductions */
1584: MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1585: MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1586: MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1587: if (min != 0.0) ratio = max / min;
1588: else ratio = 0.0;
1589: PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio);
1590: numReductions = red; /* wrong because uses count from process zero */
1591: PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n");
1592: PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n");
1593: PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n");
1595: /* Get total number of stages --
1596: Currently, a single processor can register more stages than another, but stages must all be registered in order.
1597: We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID.
1598: This seems best accomplished by assoicating a communicator with each stage.
1599: */
1600: PetscLogGetStageLog(&stageLog);
1601: MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1602: PetscMalloc1(numStages, &localStageUsed);
1603: PetscMalloc1(numStages, &stageUsed);
1604: PetscMalloc1(numStages, &localStageVisible);
1605: PetscMalloc1(numStages, &stageVisible);
1606: if (numStages > 0) {
1607: stageInfo = stageLog->stageInfo;
1608: for (stage = 0; stage < numStages; stage++) {
1609: if (stage < stageLog->numStages) {
1610: localStageUsed[stage] = stageInfo[stage].used;
1611: localStageVisible[stage] = stageInfo[stage].perfInfo.visible;
1612: } else {
1613: localStageUsed[stage] = PETSC_FALSE;
1614: localStageVisible[stage] = PETSC_TRUE;
1615: }
1616: }
1617: MPI_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm);
1618: MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm);
1619: for (stage = 0; stage < numStages; stage++) {
1620: if (stageUsed[stage]) {
1621: PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n");
1622: PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n");
1623: break;
1624: }
1625: }
1626: for (stage = 0; stage < numStages; stage++) {
1627: if (!stageUsed[stage]) continue;
1628: /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1629: if (localStageUsed[stage]) {
1630: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1631: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1632: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1633: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1634: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1635: name = stageInfo[stage].name;
1636: } else {
1637: MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1638: MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1639: MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1640: MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1641: MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1642: name = "";
1643: }
1644: mess *= 0.5;
1645: messLen *= 0.5;
1646: red /= size;
1647: if (TotalTime != 0.0) fracTime = stageTime / TotalTime;
1648: else fracTime = 0.0;
1649: if (TotalFlops != 0.0) fracFlops = flops / TotalFlops;
1650: else fracFlops = 0.0;
1651: /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */
1652: if (numMessages != 0.0) fracMessages = mess / numMessages;
1653: else fracMessages = 0.0;
1654: if (mess != 0.0) avgMessLen = messLen / mess;
1655: else avgMessLen = 0.0;
1656: if (messageLength != 0.0) fracLength = messLen / messageLength;
1657: else fracLength = 0.0;
1658: if (numReductions != 0.0) fracReductions = red / numReductions;
1659: else fracReductions = 0.0;
1660: PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions);
1661: }
1662: }
1664: PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n");
1665: PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n");
1666: PetscFPrintf(comm, fd, "Phase summary info:\n");
1667: PetscFPrintf(comm, fd, " Count: number of times phase was executed\n");
1668: PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n");
1669: PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n");
1670: PetscFPrintf(comm, fd, " Mess: number of messages sent\n");
1671: PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n");
1672: PetscFPrintf(comm, fd, " Reduct: number of global reductions\n");
1673: PetscFPrintf(comm, fd, " Global: entire computation\n");
1674: PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n");
1675: PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n");
1676: PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n");
1677: PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n");
1678: PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n");
1679: if (PetscLogMemory) {
1680: PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n");
1681: PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n");
1682: PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n");
1683: PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n");
1684: }
1685: #if defined(PETSC_HAVE_DEVICE)
1686: PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n");
1687: PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n");
1688: PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n");
1689: PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n");
1690: PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n");
1691: PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n");
1692: #endif
1693: PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n");
1695: PetscLogViewWarnDebugging(comm, fd);
1697: /* Report events */
1698: PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total");
1699: if (PetscLogMemory) PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI");
1700: #if defined(PETSC_HAVE_DEVICE)
1701: PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU");
1702: #endif
1703: PetscFPrintf(comm, fd, "\n");
1704: PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s");
1705: if (PetscLogMemory) PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes");
1706: #if defined(PETSC_HAVE_DEVICE)
1707: PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F");
1708: #endif
1709: PetscFPrintf(comm, fd, "\n");
1710: PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------");
1711: if (PetscLogMemory) PetscFPrintf(comm, fd, "-----------------------------");
1712: #if defined(PETSC_HAVE_DEVICE)
1713: PetscFPrintf(comm, fd, "---------------------------------------");
1714: #endif
1715: PetscFPrintf(comm, fd, "\n");
1717: #if defined(PETSC_HAVE_DEVICE)
1718: /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */
1719: PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve);
1720: PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step);
1721: PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve);
1722: PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve);
1723: #endif
1725: /* Problem: The stage name will not show up unless the stage executed on proc 1 */
1726: for (stage = 0; stage < numStages; stage++) {
1727: if (!stageVisible[stage]) continue;
1728: /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1729: if (localStageUsed[stage]) {
1730: PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1731: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1732: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1733: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1734: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1735: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1736: } else {
1737: PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1738: MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1739: MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1740: MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1741: MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1742: MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1743: }
1744: mess *= 0.5;
1745: messLen *= 0.5;
1746: red /= size;
1748: /* Get total number of events in this stage --
1749: Currently, a single processor can register more events than another, but events must all be registered in order,
1750: just like stages. We can removed this requirement if necessary by having a global event numbering and indirection
1751: on the event ID. This seems best accomplished by associating a communicator with each stage.
1753: Problem: If the event did not happen on proc 1, its name will not be available.
1754: Problem: Event visibility is not implemented
1755: */
1756: if (localStageUsed[stage]) {
1757: eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1758: localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents;
1759: } else localNumEvents = 0;
1760: MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1761: for (event = 0; event < numEvents; event++) {
1762: /* CANNOT use MPI_Allreduce() since it might fail the line number check */
1763: if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) {
1764: if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops;
1765: else flopr = 0.0;
1766: MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1767: MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1768: MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1769: MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1770: MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1771: MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1772: MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1773: MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1774: MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1775: MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm);
1776: MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm);
1777: if (PetscLogMemory) {
1778: MPI_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1779: MPI_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1780: MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1781: MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1782: }
1783: #if defined(PETSC_HAVE_DEVICE)
1784: MPI_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1785: MPI_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1786: MPI_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1787: MPI_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1788: MPI_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1789: MPI_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1790: #endif
1791: name = stageLog->eventLog->eventInfo[event].name;
1792: } else {
1793: flopr = 0.0;
1794: MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1795: MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1796: MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1797: MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1798: MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1799: MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1800: MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1801: MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1802: MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1803: MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm);
1804: MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm);
1805: if (PetscLogMemory) {
1806: MPI_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1807: MPI_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1808: MPI_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1809: MPI_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1810: }
1811: #if defined(PETSC_HAVE_DEVICE)
1812: MPI_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1813: MPI_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1814: MPI_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1815: MPI_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1816: MPI_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1817: MPI_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1818: #endif
1819: name = "";
1820: }
1821: if (mint < 0.0) {
1822: PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name);
1823: mint = 0;
1824: }
1826: /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */
1827: #if defined(PETSC_HAVE_DEVICE)
1828: if (!PetscLogGpuTimeFlag && petsc_gflops > 0) {
1829: memcpy(&gmaxt, &nas, sizeof(PetscLogDouble));
1830: PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid);
1831: if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) {
1832: memcpy(&mint, &nas, sizeof(PetscLogDouble));
1833: memcpy(&maxt, &nas, sizeof(PetscLogDouble));
1834: }
1835: }
1836: #endif
1837: totm *= 0.5;
1838: totml *= 0.5;
1839: totr /= size;
1841: if (maxC != 0) {
1842: if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC;
1843: else ratC = 0.0;
1844: if (mint != 0.0) ratt = maxt / mint;
1845: else ratt = 0.0;
1846: if (minf != 0.0) ratf = maxf / minf;
1847: else ratf = 0.0;
1848: if (TotalTime != 0.0) fracTime = tott / TotalTime;
1849: else fracTime = 0.0;
1850: if (TotalFlops != 0.0) fracFlops = totf / TotalFlops;
1851: else fracFlops = 0.0;
1852: if (stageTime != 0.0) fracStageTime = tott / stageTime;
1853: else fracStageTime = 0.0;
1854: if (flops != 0.0) fracStageFlops = totf / flops;
1855: else fracStageFlops = 0.0;
1856: if (numMessages != 0.0) fracMess = totm / numMessages;
1857: else fracMess = 0.0;
1858: if (messageLength != 0.0) fracMessLen = totml / messageLength;
1859: else fracMessLen = 0.0;
1860: if (numReductions != 0.0) fracRed = totr / numReductions;
1861: else fracRed = 0.0;
1862: if (mess != 0.0) fracStageMess = totm / mess;
1863: else fracStageMess = 0.0;
1864: if (messLen != 0.0) fracStageMessLen = totml / messLen;
1865: else fracStageMessLen = 0.0;
1866: if (red != 0.0) fracStageRed = totr / red;
1867: else fracStageRed = 0.0;
1868: if (totm != 0.0) totml /= totm;
1869: else totml = 0.0;
1870: if (maxt != 0.0) flopr = totf / maxt;
1871: else flopr = 0.0;
1872: if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0)
1873: PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6);
1874: else
1875: PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6);
1876: if (PetscLogMemory) PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6);
1877: #if defined(PETSC_HAVE_DEVICE)
1878: if (totf != 0.0) fracgflops = gflops / totf;
1879: else fracgflops = 0.0;
1880: if (gmaxt != 0.0) gflopr = gflops / gmaxt;
1881: else gflopr = 0.0;
1882: PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops);
1883: #endif
1884: PetscFPrintf(comm, fd, "\n");
1885: }
1886: }
1887: }
1889: /* Memory usage and object creation */
1890: PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------");
1891: if (PetscLogMemory) PetscFPrintf(comm, fd, "-----------------------------");
1892: #if defined(PETSC_HAVE_DEVICE)
1893: PetscFPrintf(comm, fd, "---------------------------------------");
1894: #endif
1895: PetscFPrintf(comm, fd, "\n");
1896: PetscFPrintf(comm, fd, "\n");
1898: /* Right now, only stages on the first processor are reported here, meaning only objects associated with
1899: the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
1900: stats for stages local to processor sets.
1901: */
1902: /* We should figure out the longest object name here (now 20 characters) */
1903: PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n");
1904: for (stage = 0; stage < numStages; stage++) {
1905: if (localStageUsed[stage]) {
1906: classInfo = stageLog->stageInfo[stage].classLog->classInfo;
1907: PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1908: for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) {
1909: if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) {
1910: PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions);
1911: }
1912: }
1913: } else {
1914: if (!localStageVisible[stage]) continue;
1915: PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1916: }
1917: }
1919: PetscFree(localStageUsed);
1920: PetscFree(stageUsed);
1921: PetscFree(localStageVisible);
1922: PetscFree(stageVisible);
1924: /* Information unrelated to this particular run */
1925: PetscFPrintf(comm, fd, "========================================================================================================================\n");
1926: PetscTime(&y);
1927: PetscTime(&x);
1928: PetscTime(&y);
1929: PetscTime(&y);
1930: PetscTime(&y);
1931: PetscTime(&y);
1932: PetscTime(&y);
1933: PetscTime(&y);
1934: PetscTime(&y);
1935: PetscTime(&y);
1936: PetscTime(&y);
1937: PetscTime(&y);
1938: PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0);
1939: /* MPI information */
1940: if (size > 1) {
1941: MPI_Status status;
1942: PetscMPIInt tag;
1943: MPI_Comm newcomm;
1945: MPI_Barrier(comm);
1946: PetscTime(&x);
1947: MPI_Barrier(comm);
1948: MPI_Barrier(comm);
1949: MPI_Barrier(comm);
1950: MPI_Barrier(comm);
1951: MPI_Barrier(comm);
1952: PetscTime(&y);
1953: PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0);
1954: PetscCommDuplicate(comm, &newcomm, &tag);
1955: MPI_Barrier(comm);
1956: if (rank) {
1957: MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status);
1958: MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm);
1959: } else {
1960: PetscTime(&x);
1961: MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm);
1962: MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status);
1963: PetscTime(&y);
1964: PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size);
1965: }
1966: PetscCommDestroy(&newcomm);
1967: }
1968: PetscOptionsView(NULL, viewer);
1970: /* Machine and compile information */
1971: #if defined(PETSC_USE_FORTRAN_KERNELS)
1972: PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n");
1973: #else
1974: PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n");
1975: #endif
1976: #if defined(PETSC_USE_64BIT_INDICES)
1977: PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n");
1978: #elif defined(PETSC_USE___FLOAT128)
1979: PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n");
1980: #endif
1981: #if defined(PETSC_USE_REAL_SINGLE)
1982: PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n");
1983: #elif defined(PETSC_USE___FLOAT128)
1984: PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n");
1985: #endif
1986: #if defined(PETSC_USE_REAL_MAT_SINGLE)
1987: PetscFPrintf(comm, fd, "Compiled with single precision matrices\n");
1988: #else
1989: PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n");
1990: #endif
1991: PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt));
1993: PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions);
1994: PetscFPrintf(comm, fd, "%s", petscmachineinfo);
1995: PetscFPrintf(comm, fd, "%s", petsccompilerinfo);
1996: PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo);
1997: PetscFPrintf(comm, fd, "%s", petsclinkerinfo);
1999: /* Cleanup */
2000: PetscFPrintf(comm, fd, "\n");
2001: PetscLogViewWarnNoGpuAwareMpi(comm, fd);
2002: PetscLogViewWarnDebugging(comm, fd);
2003: PetscFPTrapPop();
2004: return 0;
2005: }
2007: /*@C
2008: PetscLogView - Prints a summary of the logging.
2010: Collective over MPI_Comm
2012: Input Parameter:
2013: . viewer - an ASCII viewer
2015: Options Database Keys:
2016: + -log_view [:filename] - Prints summary of log information
2017: . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file
2018: . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it)
2019: . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it)
2020: . -log_view_memory - Also display memory usage in each event
2021: . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation)
2022: . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation
2023: - -log_trace [filename] - Displays a trace of what each process is doing
2025: Level: beginner
2027: Notes:
2028: It is possible to control the logging programmatically but we recommend using the options database approach whenever possible
2029: By default the summary is printed to stdout.
2031: Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin()
2033: If PETSc is configured with --with-logging=0 then this functionality is not available
2035: To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current
2036: directory then open filename.xml with your browser. Specific notes for certain browsers
2037: $ Firefox and Internet explorer - simply open the file
2038: $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files
2039: $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access
2040: or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with
2041: your browser.
2042: Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser
2043: window and render the XML log file contents.
2045: The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS
2047: The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph)
2048: or using speedscope (https://www.speedscope.app).
2049: Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py.
2051: .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogDump()`
2052: @*/
2053: PetscErrorCode PetscLogView(PetscViewer viewer)
2054: {
2055: PetscBool isascii;
2056: PetscViewerFormat format;
2057: int stage, lastStage;
2058: PetscStageLog stageLog;
2061: /* Pop off any stages the user forgot to remove */
2062: lastStage = 0;
2063: PetscLogGetStageLog(&stageLog);
2064: PetscStageLogGetCurrent(stageLog, &stage);
2065: while (stage >= 0) {
2066: lastStage = stage;
2067: PetscStageLogPop(stageLog);
2068: PetscStageLogGetCurrent(stageLog, &stage);
2069: }
2070: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
2072: PetscViewerGetFormat(viewer, &format);
2073: if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) {
2074: PetscLogView_Default(viewer);
2075: } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2076: PetscLogView_Detailed(viewer);
2077: } else if (format == PETSC_VIEWER_ASCII_CSV) {
2078: PetscLogView_CSV(viewer);
2079: } else if (format == PETSC_VIEWER_ASCII_XML) {
2080: PetscLogView_Nested(viewer);
2081: } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) {
2082: PetscLogView_Flamegraph(viewer);
2083: }
2084: PetscStageLogPush(stageLog, lastStage);
2085: return 0;
2086: }
2088: /*@C
2089: PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed.
2091: Collective on `PETSC_COMM_WORLD`
2093: Level: developer
2095: .seealso: [](ch_profiling), `PetscLogView()`
2096: @*/
2097: PetscErrorCode PetscLogViewFromOptions(void)
2098: {
2099: PetscViewer viewer;
2100: PetscBool flg;
2101: PetscViewerFormat format;
2103: PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg);
2104: if (flg) {
2105: PetscViewerPushFormat(viewer, format);
2106: PetscLogView(viewer);
2107: PetscViewerPopFormat(viewer);
2108: PetscViewerDestroy(&viewer);
2109: }
2110: return 0;
2111: }
2113: /*----------------------------------------------- Counter Functions -------------------------------------------------*/
2114: /*@C
2115: PetscGetFlops - Returns the number of flops used on this processor
2116: since the program began.
2118: Not Collective
2120: Output Parameter:
2121: flops - number of floating point operations
2123: Level: intermediate
2125: Notes:
2126: A global counter logs all PETSc flop counts. The user can use
2127: `PetscLogFlops()` to increment this counter to include flops for the
2128: application code.
2130: A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank
2132: .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()`
2133: @*/
2134: PetscErrorCode PetscGetFlops(PetscLogDouble *flops)
2135: {
2136: *flops = petsc_TotalFlops;
2137: return 0;
2138: }
2140: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
2141: {
2142: size_t fullLength;
2143: va_list Argp;
2145: if (!petsc_logObjects) return 0;
2146: va_start(Argp, format);
2147: PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp);
2148: va_end(Argp);
2149: return 0;
2150: }
2152: /*MC
2153: PetscLogFlops - Adds floating point operations to the global counter.
2155: Synopsis:
2156: #include <petsclog.h>
2157: PetscErrorCode PetscLogFlops(PetscLogDouble f)
2159: Not Collective
2161: Input Parameter:
2162: . f - flop counter
2164: Usage:
2165: .vb
2166: PetscLogEvent USER_EVENT;
2167: PetscLogEventRegister("User event",0,&USER_EVENT);
2168: PetscLogEventBegin(USER_EVENT,0,0,0,0);
2169: [code segment to monitor]
2170: PetscLogFlops(user_flops)
2171: PetscLogEventEnd(USER_EVENT,0,0,0,0);
2172: .ve
2174: Level: intermediate
2176: Note:
2177: A global counter logs all PETSc flop counts. The user can use
2178: PetscLogFlops() to increment this counter to include flops for the
2179: application code.
2181: .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()`
2182: M*/
2184: /*MC
2185: PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice)
2186: to get accurate timings
2188: Synopsis:
2189: #include <petsclog.h>
2190: void PetscPreLoadBegin(PetscBool flag,char *name);
2192: Not Collective
2194: Input Parameters:
2195: + flag - PETSC_TRUE to run twice, `PETSC_FALSE` to run once, may be overridden
2196: with command line option -preload true or -preload false
2197: - name - name of first stage (lines of code timed separately with -log_view) to
2198: be preloaded
2200: Usage:
2201: .vb
2202: PetscPreLoadBegin(PETSC_TRUE,"first stage);
2203: lines of code
2204: PetscPreLoadStage("second stage");
2205: lines of code
2206: PetscPreLoadEnd();
2207: .ve
2209: Level: intermediate
2211: Note:
2212: Only works in C/C++, not Fortran
2214: Flags available within the macro.
2215: + PetscPreLoadingUsed - true if we are or have done preloading
2216: . PetscPreLoadingOn - true if it is CURRENTLY doing preload
2217: . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second
2218: - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on
2219: The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin()
2220: and PetscPreLoadEnd()
2222: .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()`
2223: M*/
2225: /*MC
2226: PetscPreLoadEnd - End a segment of code that may be preloaded (run twice)
2227: to get accurate timings
2229: Synopsis:
2230: #include <petsclog.h>
2231: void PetscPreLoadEnd(void);
2233: Not Collective
2235: Usage:
2236: .vb
2237: PetscPreLoadBegin(PETSC_TRUE,"first stage);
2238: lines of code
2239: PetscPreLoadStage("second stage");
2240: lines of code
2241: PetscPreLoadEnd();
2242: .ve
2244: Level: intermediate
2246: Note:
2247: Only works in C/C++ not fortran
2249: .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()`
2250: M*/
2252: /*MC
2253: PetscPreLoadStage - Start a new segment of code to be timed separately.
2254: to get accurate timings
2256: Synopsis:
2257: #include <petsclog.h>
2258: void PetscPreLoadStage(char *name);
2260: Not Collective
2262: Usage:
2263: .vb
2264: PetscPreLoadBegin(PETSC_TRUE,"first stage);
2265: lines of code
2266: PetscPreLoadStage("second stage");
2267: lines of code
2268: PetscPreLoadEnd();
2269: .ve
2271: Level: intermediate
2273: Note:
2274: Only works in C/C++ not fortran
2276: .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`
2277: M*/
2279: #if PetscDefined(HAVE_DEVICE)
2280: #include <petsc/private/deviceimpl.h>
2282: PetscBool PetscLogGpuTimeFlag = PETSC_FALSE;
2284: /*
2285: This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code
2286: because it will result in timing results that cannot be interpreted.
2287: */
2288: static PetscErrorCode PetscLogGpuTime_Off(void)
2289: {
2290: PetscLogGpuTimeFlag = PETSC_FALSE;
2291: return 0;
2292: }
2294: /*@C
2295: PetscLogGpuTime - turn on the logging of GPU time for GPU kernels
2297: Options Database Key:
2298: . -log_view_gpu_time - provide the GPU times in the -log_view output
2300: Level: advanced
2302: Notes:
2303: Turning on the timing of the
2304: GPU kernels can slow down the entire computation and should only be used when studying the performance
2305: of operations on GPU such as vector operations and matrix-vector operations.
2307: This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off.
2309: .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()`
2310: @*/
2311: PetscErrorCode PetscLogGpuTime(void)
2312: {
2313: if (!PetscLogGpuTimeFlag) PetscRegisterFinalize(PetscLogGpuTime_Off);
2314: PetscLogGpuTimeFlag = PETSC_TRUE;
2315: return 0;
2316: }
2318: /*@C
2319: PetscLogGpuTimeBegin - Start timer for device
2321: Level: intermediate
2323: Notes:
2324: When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times).
2326: When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times).
2328: There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()`
2330: This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space.
2332: The regular logging captures the time for data transfers and any CPU activities during the event
2334: It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel.
2336: Developer Notes:
2337: The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`.
2339: `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the
2340: event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution,
2341: but not continued GPU execution, until the timer event is recorded.
2343: .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()`
2344: @*/
2345: PetscErrorCode PetscLogGpuTimeBegin(void)
2346: {
2347: if (!PetscLogPLB || !PetscLogGpuTimeFlag) return 0;
2348: if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) {
2349: PetscDeviceContext dctx;
2351: PetscDeviceContextGetCurrentContext(&dctx);
2352: PetscDeviceContextBeginTimer_Internal(dctx);
2353: } else {
2354: PetscTimeSubtract(&petsc_gtime);
2355: }
2356: return 0;
2357: }
2359: /*@C
2360: PetscLogGpuTimeEnd - Stop timer for device
2362: Level: intermediate
2364: .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()`
2365: @*/
2366: PetscErrorCode PetscLogGpuTimeEnd(void)
2367: {
2368: if (!PetscLogPLE || !PetscLogGpuTimeFlag) return 0;
2369: if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) {
2370: PetscDeviceContext dctx;
2371: PetscLogDouble elapsed;
2373: PetscDeviceContextGetCurrentContext(&dctx);
2374: PetscDeviceContextEndTimer_Internal(dctx, &elapsed);
2375: petsc_gtime += (elapsed / 1000.0);
2376: } else {
2377: PetscTimeAdd(&petsc_gtime);
2378: }
2379: return 0;
2380: }
2381: #endif /* end of PETSC_HAVE_DEVICE */
2383: #else /* end of -DPETSC_USE_LOG section */
2385: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
2386: {
2387: return 0;
2388: }
2390: #endif /* PETSC_USE_LOG*/
2392: PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID;
2393: PetscClassId PETSC_OBJECT_CLASSID = 0;
2395: /*@C
2396: PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code.
2398: Not Collective
2400: Input Parameter:
2401: . name - The class name
2403: Output Parameter:
2404: . oclass - The class id or classid
2406: Level: developer
2408: .seealso: [](ch_profiling), `PetscLogEventRegister()`
2409: @*/
2410: PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass)
2411: {
2412: #if defined(PETSC_USE_LOG)
2413: PetscStageLog stageLog;
2414: PetscInt stage;
2415: #endif
2417: *oclass = ++PETSC_LARGEST_CLASSID;
2418: #if defined(PETSC_USE_LOG)
2419: PetscLogGetStageLog(&stageLog);
2420: PetscClassRegLogRegister(stageLog->classLog, name, *oclass);
2421: for (stage = 0; stage < stageLog->numStages; stage++) PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
2422: #endif
2423: return 0;
2424: }
2426: #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE)
2427: #include <mpe.h>
2429: PetscBool PetscBeganMPE = PETSC_FALSE;
2431: PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
2432: PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
2434: /*@C
2435: PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files
2436: and slows the program down.
2438: Collective over `PETSC_COMM_WORLD`
2440: Options Database Key:
2441: . -log_mpe - Prints extensive log information
2443: Level: advanced
2445: Note:
2446: A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is
2447: intended for production runs since it logs only flop rates and object
2448: creation (and should not significantly slow the programs).
2450: .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`,
2451: `PetscLogEventDeactivate()`
2452: @*/
2453: PetscErrorCode PetscLogMPEBegin(void)
2454: {
2455: /* Do MPE initialization */
2456: if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */
2457: PetscInfo(0, "Initializing MPE.\n");
2458: MPE_Init_log();
2460: PetscBeganMPE = PETSC_TRUE;
2461: } else {
2462: PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n");
2463: }
2464: PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE);
2465: return 0;
2466: }
2468: /*@C
2469: PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot.
2471: Collective over `PETSC_COMM_WORLD`
2473: Level: advanced
2475: .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()`
2476: @*/
2477: PetscErrorCode PetscLogMPEDump(const char sname[])
2478: {
2479: char name[PETSC_MAX_PATH_LEN];
2481: if (PetscBeganMPE) {
2482: PetscInfo(0, "Finalizing MPE.\n");
2483: if (sname) {
2484: PetscStrcpy(name, sname);
2485: } else {
2486: PetscGetProgramName(name, sizeof(name));
2487: }
2488: MPE_Finish_log(name);
2489: } else {
2490: PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n");
2491: }
2492: return 0;
2493: }
2495: #define PETSC_RGB_COLORS_MAX 39
2496: static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ",
2497: "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ",
2498: "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ",
2499: "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ",
2500: "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "};
2502: /*@C
2503: PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()`
2505: Not collective. Maybe it should be?
2507: Output Parameter:
2508: . str - character string representing the color
2510: Level: developer
2512: .seealso: [](ch_profiling), `PetscLogEventRegister()`
2513: @*/
2514: PetscErrorCode PetscLogMPEGetRGBColor(const char *str[])
2515: {
2516: static int idx = 0;
2518: *str = PetscLogMPERGBColors[idx];
2519: idx = (idx + 1) % PETSC_RGB_COLORS_MAX;
2520: return 0;
2521: }
2523: #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */