Detect SSE4.1 support for Subzero.

Bug swiftshader:20

Change-Id: I20c2ab7cb4c00c365520ff8b8500f7594127498b
Reviewed-on: https://swiftshader-review.googlesource.com/8468
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Common/Math.cpp b/src/Common/Math.cpp
index fcf7eca..290d4ab 100644
--- a/src/Common/Math.cpp
+++ b/src/Common/Math.cpp
@@ -14,8 +14,6 @@
 
 #include "Math.hpp"
 
-#include "CPUID.hpp"
-
 namespace sw
 {
 	inline uint64_t FNV_1a(uint64_t hash, unsigned char data)
@@ -26,7 +24,7 @@
 	uint64_t FNV_1a(const unsigned char *data, int size)
 	{
 		int64_t hash = 0xCBF29CE484222325;
-   
+
 		for(int i = 0; i < size; i++)
 		{
 			hash = FNV_1a(hash, data[i]);
diff --git a/src/Main/FrameBuffer.cpp b/src/Main/FrameBuffer.cpp
index db83ce4..0b8c010 100644
--- a/src/Main/FrameBuffer.cpp
+++ b/src/Main/FrameBuffer.cpp
@@ -15,7 +15,6 @@
 #include "FrameBuffer.hpp"
 
 #include "Timer.hpp"
-#include "CPUID.hpp"
 #include "Renderer/Surface.hpp"
 #include "Reactor/Reactor.hpp"
 #include "Common/Debug.hpp"
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 0a78f6e..bd23e35 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -57,6 +57,34 @@
 	Ice::Fdstream *out = nullptr;
 }
 
+namespace
+{
+	class CPUID
+	{
+	public:
+		const static bool SSE4_1;
+
+	private:
+		static void cpuid(int registers[4], int info)
+		{
+			#if defined(_WIN32)
+				__cpuid(registers, info);
+			#else
+				__asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
+			#endif
+		}
+
+		static bool detectSSE4_1()
+		{
+			int registers[4];
+			cpuid(registers, 1);
+			return (registers[2] & 0x00080000) != 0;
+		}
+	};
+
+	const bool CPUID::SSE4_1 = CPUID::detectSSE4_1();
+}
+
 namespace sw
 {
 	enum EmulatedType
@@ -402,7 +430,7 @@
 		Flags.setOutFileType(Ice::FT_Elf);
 		Flags.setOptLevel(Ice::Opt_2);
 		Flags.setApplicationBinaryInterface(Ice::ABI_Platform);
-		Flags.setTargetInstructionSet(Ice::X86InstructionSet_SSE4_1);
+		Flags.setTargetInstructionSet(CPUID::SSE4_1 ? Ice::X86InstructionSet_SSE4_1 : Ice::X86InstructionSet_SSE2);
 		Flags.setVerbose(false ? Ice::IceV_All : Ice::IceV_None);
 
 		static llvm::raw_os_ostream cout(std::cout);
diff --git a/src/Renderer/Context.cpp b/src/Renderer/Context.cpp
index 7e44441..caa4592 100644
--- a/src/Renderer/Context.cpp
+++ b/src/Renderer/Context.cpp
@@ -19,7 +19,6 @@
 #include "Primitive.hpp"
 #include "Surface.hpp"
 #include "Memory.hpp"
-#include "CPUID.hpp"
 #include "Debug.hpp"
 
 #include <string.h>
diff --git a/src/Renderer/Sampler.cpp b/src/Renderer/Sampler.cpp
index e83f29f..cd4c614 100644
--- a/src/Renderer/Sampler.cpp
+++ b/src/Renderer/Sampler.cpp
@@ -16,7 +16,6 @@
 
 #include "Context.hpp"
 #include "Surface.hpp"
-#include "CPUID.hpp"
 #include "PixelRoutine.hpp"
 #include "Debug.hpp"
 
diff --git a/src/Renderer/VertexProcessor.cpp b/src/Renderer/VertexProcessor.cpp
index a64c010..91c4d34 100644
--- a/src/Renderer/VertexProcessor.cpp
+++ b/src/Renderer/VertexProcessor.cpp
@@ -1068,7 +1068,7 @@
 
 		if(!routine)   // Create one
 		{
-			VertexRoutine *generator = 0;
+			VertexRoutine *generator = nullptr;
 
 			if(state.fixedFunction)
 			{
diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp
index 7e6c943..d0650b0 100644
--- a/src/Shader/PixelRoutine.cpp
+++ b/src/Shader/PixelRoutine.cpp
@@ -18,7 +18,6 @@
 #include "QuadRasterizer.hpp"
 #include "Surface.hpp"
 #include "Primitive.hpp"
-#include "CPUID.hpp"
 #include "SamplerCore.hpp"
 #include "Constants.hpp"
 #include "Debug.hpp"
diff --git a/third_party/pnacl-subzero b/third_party/pnacl-subzero
index 4e679e5..dbf81e0 160000
--- a/third_party/pnacl-subzero
+++ b/third_party/pnacl-subzero
@@ -1 +1 @@
-Subproject commit 4e679e51f645b99940f43c80fec7f146d59eb00f
+Subproject commit dbf81e0c0e364173176159e0e2548e9948b197a4