Detect SSE4.1 support for Subzero.
Bug swiftshader:20
Change-Id: I20c2ab7cb4c00c365520ff8b8500f7594127498b
Reviewed-on: https://swiftshader-review.googlesource.com/8468
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Common/Math.cpp b/src/Common/Math.cpp
index fcf7eca..290d4ab 100644
--- a/src/Common/Math.cpp
+++ b/src/Common/Math.cpp
@@ -14,8 +14,6 @@
#include "Math.hpp"
-#include "CPUID.hpp"
-
namespace sw
{
inline uint64_t FNV_1a(uint64_t hash, unsigned char data)
@@ -26,7 +24,7 @@
uint64_t FNV_1a(const unsigned char *data, int size)
{
int64_t hash = 0xCBF29CE484222325;
-
+
for(int i = 0; i < size; i++)
{
hash = FNV_1a(hash, data[i]);
diff --git a/src/Main/FrameBuffer.cpp b/src/Main/FrameBuffer.cpp
index db83ce4..0b8c010 100644
--- a/src/Main/FrameBuffer.cpp
+++ b/src/Main/FrameBuffer.cpp
@@ -15,7 +15,6 @@
#include "FrameBuffer.hpp"
#include "Timer.hpp"
-#include "CPUID.hpp"
#include "Renderer/Surface.hpp"
#include "Reactor/Reactor.hpp"
#include "Common/Debug.hpp"
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 0a78f6e..bd23e35 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -57,6 +57,34 @@
Ice::Fdstream *out = nullptr;
}
+namespace
+{
+ class CPUID
+ {
+ public:
+ const static bool SSE4_1;
+
+ private:
+ static void cpuid(int registers[4], int info)
+ {
+ #if defined(_WIN32)
+ __cpuid(registers, info);
+ #else
+ __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
+ #endif
+ }
+
+ static bool detectSSE4_1()
+ {
+ int registers[4];
+ cpuid(registers, 1);
+ return (registers[2] & 0x00080000) != 0;
+ }
+ };
+
+ const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
+}
+
namespace sw
{
enum EmulatedType
@@ -402,7 +430,7 @@
Flags.setOutFileType(Ice::FT_Elf);
Flags.setOptLevel(Ice::Opt_2);
Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
- Flags.setTargetInstructionSet(Ice::X86InstructionSet_SSE4_1);
+ Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
Flags.setVerbose(false ? Ice::IceV_All : Ice::IceV_None);
static llvm::raw_os_ostream cout(std::cout);
diff --git a/src/Renderer/Context.cpp b/src/Renderer/Context.cpp
index 7e44441..caa4592 100644
--- a/src/Renderer/Context.cpp
+++ b/src/Renderer/Context.cpp
@@ -19,7 +19,6 @@
#include "Primitive.hpp"
#include "Surface.hpp"
#include "Memory.hpp"
-#include "CPUID.hpp"
#include "Debug.hpp"
#include <string.h>
diff --git a/src/Renderer/Sampler.cpp b/src/Renderer/Sampler.cpp
index e83f29f..cd4c614 100644
--- a/src/Renderer/Sampler.cpp
+++ b/src/Renderer/Sampler.cpp
@@ -16,7 +16,6 @@
#include "Context.hpp"
#include "Surface.hpp"
-#include "CPUID.hpp"
#include "PixelRoutine.hpp"
#include "Debug.hpp"
diff --git a/src/Renderer/VertexProcessor.cpp b/src/Renderer/VertexProcessor.cpp
index a64c010..91c4d34 100644
--- a/src/Renderer/VertexProcessor.cpp
+++ b/src/Renderer/VertexProcessor.cpp
@@ -1068,7 +1068,7 @@
if(!routine) // Create one
{
- VertexRoutine *generator = 0;
+ VertexRoutine *generator = nullptr;
if(state.fixedFunction)
{
diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp
index 7e6c943..d0650b0 100644
--- a/src/Shader/PixelRoutine.cpp
+++ b/src/Shader/PixelRoutine.cpp
@@ -18,7 +18,6 @@
#include "QuadRasterizer.hpp"
#include "Surface.hpp"
#include "Primitive.hpp"
-#include "CPUID.hpp"
#include "SamplerCore.hpp"
#include "Constants.hpp"
#include "Debug.hpp"
diff --git a/third_party/pnacl-subzero b/third_party/pnacl-subzero
index 4e679e5..dbf81e0 160000
--- a/third_party/pnacl-subzero
+++ b/third_party/pnacl-subzero
@@ -1 +1 @@
-Subproject commit 4e679e51f645b99940f43c80fec7f146d59eb00f
+Subproject commit dbf81e0c0e364173176159e0e2548e9948b197a4