Fix ARM compilation.

This does not provide full support for ARM,
but merely makes things (statically) compile.

Bug b/37478805

Change-Id: I01d1d84e396c04c84e74d521946595014d2eafb5
Reviewed-on: https://swiftshader-review.googlesource.com/9430
Reviewed-by: Nicolas Capens <capn@google.com>
Tested-by: Nicolas Capens <capn@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 1e8a75a..bfcf2ff 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -37,9 +37,12 @@
 #include "Memory.hpp"
 #include "MutexLock.hpp"
 
-#include <xmmintrin.h>
 #include <fstream>
 
+#if defined(__i386__) || defined(__x86_64__)
+#include <xmmintrin.h>
+#endif
+
 #if defined(__x86_64__) && defined(_WIN32)
 extern "C" void X86CompilationCallback()
 {
@@ -5734,16 +5737,16 @@
 
 	RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
 	{
-		if(exactAtPow2)
-		{
-			// rcpss uses a piecewise-linear approximation which minimizes the relative error
-			// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
-			return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
-		}
-		else
-		{
-			return x86::rcpss(x);
-		}
+		#if defined(__i386__) || defined(__x86_64__)
+			if(exactAtPow2)
+			{
+				// rcpss uses a piecewise-linear approximation which minimizes the relative error
+				// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
+				return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
+			}
+		#endif
+
+		return x86::rcpss(x);
 	}
 
 	RValue<Float> RcpSqrt_pp(RValue<Float> x)
@@ -6114,16 +6117,16 @@
 
 	RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
 	{
-		if(exactAtPow2)
-		{
-			// rcpps uses a piecewise-linear approximation which minimizes the relative error
-			// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
-			return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
-		}
-		else
-		{
-			return x86::rcpps(x);
-		}
+		#if defined(__i386__) || defined(__x86_64__)
+			if(exactAtPow2)
+			{
+				// rcpps uses a piecewise-linear approximation which minimizes the relative error
+				// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
+				return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
+			}
+		#endif
+
+		return x86::rcpps(x);
 	}
 
 	RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index d1464a5..ee3fedb 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -66,6 +66,14 @@
 
 namespace
 {
+	#if !defined(__i386__) && defined(_M_IX86)
+		#define __i386__ 1
+	#endif
+
+	#if !defined(__x86_64__) && (defined(_M_AMD64) || defined (_M_X64))
+		#define __x86_64__ 1
+	#endif
+
 	class CPUID
 	{
 	public:
@@ -74,18 +82,29 @@
 	private:
 		static void cpuid(int registers[4], int info)
 		{
-			#if defined(_WIN32)
-				__cpuid(registers, info);
+			#if defined(__i386__) || defined(__x86_64__)
+				#if defined(_WIN32)
+					__cpuid(registers, info);
+				#else
+					__asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
+				#endif
 			#else
-				__asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
+				registers[0] = 0;
+				registers[1] = 0;
+				registers[2] = 0;
+				registers[3] = 0;
 			#endif
 		}
 
 		static bool detectSSE4_1()
 		{
-			int registers[4];
-			cpuid(registers, 1);
-			return (registers[2] & 0x00080000) != 0;
+			#if defined(__i386__) || defined(__x86_64__)
+				int registers[4];
+				cpuid(registers, 1);
+				return (registers[2] & 0x00080000) != 0;
+			#else
+				return false;
+			#endif
 		}
 	};