Fix ARM compilation.
This does not provide full support for ARM,
but merely makes things (statically) compile.
Bug b/37478805
Change-Id: I01d1d84e396c04c84e74d521946595014d2eafb5
Reviewed-on: https://swiftshader-review.googlesource.com/9430
Reviewed-by: Nicolas Capens <capn@google.com>
Tested-by: Nicolas Capens <capn@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 1e8a75a..bfcf2ff 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -37,9 +37,12 @@
#include "Memory.hpp"
#include "MutexLock.hpp"
-#include <xmmintrin.h>
#include <fstream>
+#if defined(__i386__) || defined(__x86_64__)
+#include <xmmintrin.h>
+#endif
+
#if defined(__x86_64__) && defined(_WIN32)
extern "C" void X86CompilationCallback()
{
@@ -5734,16 +5737,16 @@
RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
{
- if(exactAtPow2)
- {
- // rcpss uses a piecewise-linear approximation which minimizes the relative error
- // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
- return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
- }
- else
- {
- return x86::rcpss(x);
- }
+ #if defined(__i386__) || defined(__x86_64__)
+ if(exactAtPow2)
+ {
+ // rcpss uses a piecewise-linear approximation which minimizes the relative error
+ // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
+ return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
+ }
+ #endif
+
+ return x86::rcpss(x);
}
RValue<Float> RcpSqrt_pp(RValue<Float> x)
@@ -6114,16 +6117,16 @@
RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
{
- if(exactAtPow2)
- {
- // rcpps uses a piecewise-linear approximation which minimizes the relative error
- // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
- return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
- }
- else
- {
- return x86::rcpps(x);
- }
+ #if defined(__i386__) || defined(__x86_64__)
+ if(exactAtPow2)
+ {
+ // rcpps uses a piecewise-linear approximation which minimizes the relative error
+ // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
+ return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
+ }
+ #endif
+
+ return x86::rcpps(x);
}
RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index d1464a5..ee3fedb 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -66,6 +66,14 @@
namespace
{
+ #if !defined(__i386__) && defined(_M_IX86)
+ #define __i386__ 1
+ #endif
+
+ #if !defined(__x86_64__) && (defined(_M_AMD64) || defined (_M_X64))
+ #define __x86_64__ 1
+ #endif
+
class CPUID
{
public:
@@ -74,18 +82,29 @@
private:
static void cpuid(int registers[4], int info)
{
- #if defined(_WIN32)
- __cpuid(registers, info);
+ #if defined(__i386__) || defined(__x86_64__)
+ #if defined(_WIN32)
+ __cpuid(registers, info);
+ #else
+ __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
+ #endif
#else
- __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
+ registers[0] = 0;
+ registers[1] = 0;
+ registers[2] = 0;
+ registers[3] = 0;
#endif
}
static bool detectSSE4_1()
{
- int registers[4];
- cpuid(registers, 1);
- return (registers[2] & 0x00080000) != 0;
+ #if defined(__i386__) || defined(__x86_64__)
+ int registers[4];
+ cpuid(registers, 1);
+ return (registers[2] & 0x00080000) != 0;
+ #else
+ return false;
+ #endif
}
};