Actual source code: sseenabled.c


  2: #include <petscsys.h>

  4: #if defined(PETSC_HAVE_SSE)

  6:   #include PETSC_HAVE_SSE
  7:   #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */

  9: PetscErrorCode PetscSSEHardwareTest(PetscBool *flag)
 10: {
 11:   char      vendor[13];
 12:   char      Intel[13] = "GenuineIntel";
 13:   char      AMD[13]   = "AuthenticAMD";
 14:   char      Hygon[13] = "HygonGenuine";
 15:   PetscBool flg;

 17:   PetscStrncpy(vendor, "************", sizeof(vendor));
 18:   CPUID_GET_VENDOR(vendor);
 19:   PetscStrcmp(vendor, Intel, &flg);
 20:   if (!flg) PetscStrcmp(vendor, AMD, &flg);
 21:   if (!flg) {
 22:     PetscStrcmp(vendor, Hygon, &flg);
 23:     if (flg) {
 24:       /* Intel, AMD, and Hygon use bit 25 of CPUID_FEATURES */
 25:       /* to denote availability of SSE Support */
 26:       unsigned long myeax, myebx, myecx, myedx;
 27:       CPUID(CPUID_FEATURES, &myeax, &myebx, &myecx, &myedx);
 28:       if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
 29:       else *flag = PETSC_FALSE;
 30:     }
 31:     return 0;
 32:   }
 33: }

 35:   #if defined(PETSC_HAVE_FORK)
 36:     #include <signal.h>
 37:     /*
 38:    Early versions of the Linux kernel disables SSE hardware because
 39:    it does not know how to preserve the SSE state at a context switch.
 40:    To detect this feature, try an sse instruction in another process.
 41:    If it works, great!  If not, an illegal instruction signal will be thrown,
 42:    so catch it and return an error code.
 43: */
 44:     #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)

 46: static void PetscSSEDisabledHandler(int sig)
 47: {
 48:   signal(SIGILL, SIG_IGN);
 49:   exit(-1);
 50: }

 52: PetscErrorCode PetscSSEOSEnabledTest_Linux(PetscBool *flag)
 53: {
 54:   int status, pid = 0;

 56:   signal(SIGILL, PetscSSEDisabledHandler);
 57:   pid = fork();
 58:   if (pid == 0) {
 59:     SSE_SCOPE_BEGIN;
 60:     XOR_PS(XMM0, XMM0);
 61:     SSE_SCOPE_END;
 62:     exit(0);
 63:   } else wait(&status);
 64:   if (!status) *flag = PETSC_TRUE;
 65:   else *flag = PETSC_FALSE;
 66:   return 0;
 67: }

 69:   #else
 70:     /*
 71:    Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
 72:    Windows ME/2000 doesn't disable SSE Hardware
 73: */
 74:     #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
 75:   #endif

 77: PetscErrorCode PetscSSEOSEnabledTest_TRUE(PetscBool *flag)
 78: {
 79:   if (flag) *flag = PETSC_TRUE;
 80:   return 0;
 81: }

 83: #else /* Not defined PETSC_HAVE_SSE */

 85:   #define PetscSSEHardwareTest(arg)  PetscSSEEnabledTest_FALSE(arg)
 86:   #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)

 88: PetscErrorCode PetscSSEEnabledTest_FALSE(PetscBool *flag)
 89: {
 90:   if (flag) *flag = PETSC_FALSE;
 91:   return 0;
 92: }

 94: #endif /* defined PETSC_HAVE_SSE */

 96: /*@C
 97:      PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
 98:      set can be used.  Some operating systems do not allow the use of these instructions despite
 99:      hardware availability.

101:      Collective

103:      Input Parameter:
104: .    comm - the MPI Communicator

106:      Output Parameters:
107: +    lflag - Local Flag:  `PETSC_TRUE` if enabled in this process
108: -    gflag - Global Flag: `PETSC_TRUE` if enabled for all processes in comm

110:      Note:
111:      NULL can be specified for lflag or gflag if either of these values are not desired.

113:      Options Database Keys:
114: .    -disable_sse - Disable use of hand tuned Intel SSE implementations

116:      Level: developer
117: @*/
118: static PetscBool petsc_sse_local_is_untested  = PETSC_TRUE;
119: static PetscBool petsc_sse_enabled_local      = PETSC_FALSE;
120: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
121: static PetscBool petsc_sse_enabled_global     = PETSC_FALSE;
122: PetscErrorCode   PetscSSEIsEnabled(MPI_Comm comm, PetscBool *lflag, PetscBool *gflag)
123: {
124:   PetscBool disabled_option;

126:   if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
127:     disabled_option = PETSC_FALSE;

129:     PetscOptionsGetBool(NULL, NULL, "-disable_sse", &disabled_option, NULL);
130:     if (disabled_option) {
131:       petsc_sse_local_is_untested  = PETSC_FALSE;
132:       petsc_sse_enabled_local      = PETSC_FALSE;
133:       petsc_sse_global_is_untested = PETSC_FALSE;
134:       petsc_sse_enabled_global     = PETSC_FALSE;
135:     }

137:     if (petsc_sse_local_is_untested) {
138:       PetscSSEHardwareTest(&petsc_sse_enabled_local);
139:       if (petsc_sse_enabled_local) { PetscSSEOSEnabledTest(&petsc_sse_enabled_local); }
140:       petsc_sse_local_is_untested = PETSC_FALSE;
141:     }

143:     if (gflag && petsc_sse_global_is_untested) {
144:       MPIU_Allreduce(&petsc_sse_enabled_local, &petsc_sse_enabled_global, 1, MPIU_BOOL, MPI_LAND, comm);

146:       petsc_sse_global_is_untested = PETSC_FALSE;
147:     }
148:   }

150:   if (lflag) *lflag = petsc_sse_enabled_local;
151:   if (gflag) *gflag = petsc_sse_enabled_global;
152:   return 0;
153: }