Actual source code: sseenabled.c
2: #include <petscsys.h>
4: #if defined(PETSC_HAVE_SSE)
6: #include PETSC_HAVE_SSE
7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */
9: PetscErrorCode PetscSSEHardwareTest(PetscBool *flag)
10: {
11: char vendor[13];
12: char Intel[13] = "GenuineIntel";
13: char AMD[13] = "AuthenticAMD";
14: char Hygon[13] = "HygonGenuine";
15: PetscBool flg;
17: PetscStrncpy(vendor, "************", sizeof(vendor));
18: CPUID_GET_VENDOR(vendor);
19: PetscStrcmp(vendor, Intel, &flg);
20: if (!flg) PetscStrcmp(vendor, AMD, &flg);
21: if (!flg) {
22: PetscStrcmp(vendor, Hygon, &flg);
23: if (flg) {
24: /* Intel, AMD, and Hygon use bit 25 of CPUID_FEATURES */
25: /* to denote availability of SSE Support */
26: unsigned long myeax, myebx, myecx, myedx;
27: CPUID(CPUID_FEATURES, &myeax, &myebx, &myecx, &myedx);
28: if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
29: else *flag = PETSC_FALSE;
30: }
31: return 0;
32: }
33: }
35: #if defined(PETSC_HAVE_FORK)
36: #include <signal.h>
37: /*
38: Early versions of the Linux kernel disables SSE hardware because
39: it does not know how to preserve the SSE state at a context switch.
40: To detect this feature, try an sse instruction in another process.
41: If it works, great! If not, an illegal instruction signal will be thrown,
42: so catch it and return an error code.
43: */
44: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)
46: static void PetscSSEDisabledHandler(int sig)
47: {
48: signal(SIGILL, SIG_IGN);
49: exit(-1);
50: }
52: PetscErrorCode PetscSSEOSEnabledTest_Linux(PetscBool *flag)
53: {
54: int status, pid = 0;
56: signal(SIGILL, PetscSSEDisabledHandler);
57: pid = fork();
58: if (pid == 0) {
59: SSE_SCOPE_BEGIN;
60: XOR_PS(XMM0, XMM0);
61: SSE_SCOPE_END;
62: exit(0);
63: } else wait(&status);
64: if (!status) *flag = PETSC_TRUE;
65: else *flag = PETSC_FALSE;
66: return 0;
67: }
69: #else
70: /*
71: Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
72: Windows ME/2000 doesn't disable SSE Hardware
73: */
74: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
75: #endif
77: PetscErrorCode PetscSSEOSEnabledTest_TRUE(PetscBool *flag)
78: {
79: if (flag) *flag = PETSC_TRUE;
80: return 0;
81: }
83: #else /* Not defined PETSC_HAVE_SSE */
85: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
86: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)
88: PetscErrorCode PetscSSEEnabledTest_FALSE(PetscBool *flag)
89: {
90: if (flag) *flag = PETSC_FALSE;
91: return 0;
92: }
94: #endif /* defined PETSC_HAVE_SSE */
96: /*@C
97: PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
98: set can be used. Some operating systems do not allow the use of these instructions despite
99: hardware availability.
101: Collective
103: Input Parameter:
104: . comm - the MPI Communicator
106: Output Parameters:
107: + lflag - Local Flag: `PETSC_TRUE` if enabled in this process
108: - gflag - Global Flag: `PETSC_TRUE` if enabled for all processes in comm
110: Note:
111: NULL can be specified for lflag or gflag if either of these values are not desired.
113: Options Database Keys:
114: . -disable_sse - Disable use of hand tuned Intel SSE implementations
116: Level: developer
117: @*/
118: static PetscBool petsc_sse_local_is_untested = PETSC_TRUE;
119: static PetscBool petsc_sse_enabled_local = PETSC_FALSE;
120: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
121: static PetscBool petsc_sse_enabled_global = PETSC_FALSE;
122: PetscErrorCode PetscSSEIsEnabled(MPI_Comm comm, PetscBool *lflag, PetscBool *gflag)
123: {
124: PetscBool disabled_option;
126: if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
127: disabled_option = PETSC_FALSE;
129: PetscOptionsGetBool(NULL, NULL, "-disable_sse", &disabled_option, NULL);
130: if (disabled_option) {
131: petsc_sse_local_is_untested = PETSC_FALSE;
132: petsc_sse_enabled_local = PETSC_FALSE;
133: petsc_sse_global_is_untested = PETSC_FALSE;
134: petsc_sse_enabled_global = PETSC_FALSE;
135: }
137: if (petsc_sse_local_is_untested) {
138: PetscSSEHardwareTest(&petsc_sse_enabled_local);
139: if (petsc_sse_enabled_local) { PetscSSEOSEnabledTest(&petsc_sse_enabled_local); }
140: petsc_sse_local_is_untested = PETSC_FALSE;
141: }
143: if (gflag && petsc_sse_global_is_untested) {
144: MPIU_Allreduce(&petsc_sse_enabled_local, &petsc_sse_enabled_global, 1, MPIU_BOOL, MPI_LAND, comm);
146: petsc_sse_global_is_untested = PETSC_FALSE;
147: }
148: }
150: if (lflag) *lflag = petsc_sse_enabled_local;
151: if (gflag) *gflag = petsc_sse_enabled_global;
152: return 0;
153: }