crosstest/test_arith_main.cpp - SwiftShader - Git at Google

 /* crosstest.py --test=test_arith.cpp --test=test_arith_frem.ll \
    --test=test_arith_sqrt.ll --driver=test_arith_main.cpp \
    --prefix=Subzero_ --output=test_arith */

 #include <stdint.h>

 #include <climits> // CHAR_BIT
 #include <limits>
 #include <cfloat>
 #include <cmath>   // fmodf
 #include <cstring> // memcmp
 #include <iostream>

 // Include test_arith.h twice - once normally, and once within the
 // Subzero_ namespace, corresponding to the llc and Subzero translated
 // object files, respectively.
 #include "test_arith.h"
 namespace Subzero_ {
 #include "test_arith.h"
 }

 template <class T> bool inputsMayTriggerException(T Value1, T Value2) {
   // Avoid HW divide-by-zero exception.
   if (Value2 == 0)
     return true;
   // Avoid HW overflow exception (on x86-32).  TODO: adjust
   // for other architecture.
   if (Value1 == std::numeric_limits<T>::min() && Value2 == -1)
     return true;
   return false;
 }

 template <typename TypeUnsigned, typename TypeSigned>
 void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
   typedef TypeUnsigned (*FuncTypeUnsigned)(TypeUnsigned, TypeUnsigned);
   typedef TypeSigned (*FuncTypeSigned)(TypeSigned, TypeSigned);
   volatile unsigned Values[] = INT_VALUE_ARRAY;
   const static size_t NumValues = sizeof(Values) / sizeof(*Values);
   static struct {
     const char *Name;
     FuncTypeUnsigned FuncLlc;
     FuncTypeUnsigned FuncSz;
     bool ExcludeDivExceptions; // for divide related tests
   } Funcs[] = {
 #define X(inst, op, isdiv)                                                     \
   {                                                                            \
     STR(inst), (FuncTypeUnsigned)test##inst,                                   \
         (FuncTypeUnsigned)Subzero_::test##inst, isdiv                          \
   }                                                                            \
   ,
       UINTOP_TABLE
 #undef X
 #define X(inst, op, isdiv)                                                     \
   {                                                                            \
     STR(inst), (FuncTypeUnsigned)(FuncTypeSigned)test##inst,                   \
         (FuncTypeUnsigned)(FuncTypeSigned)Subzero_::test##inst, isdiv          \
   }                                                                            \
   ,
       SINTOP_TABLE
 #undef X
   };
   const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);

   if (sizeof(TypeUnsigned) <= sizeof(uint32_t)) {
     // This is the "normal" version of the loop nest, for 32-bit or
     // narrower types.
     for (size_t f = 0; f < NumFuncs; ++f) {
       for (size_t i = 0; i < NumValues; ++i) {
         for (size_t j = 0; j < NumValues; ++j) {
           TypeUnsigned Value1 = Values[i];
           TypeUnsigned Value2 = Values[j];
           // Avoid HW divide-by-zero exception.
           if (Funcs[f].ExcludeDivExceptions &&
               inputsMayTriggerException<TypeSigned>(Value1, Value2))
             continue;
           ++TotalTests;
           TypeUnsigned ResultSz = Funcs[f].FuncSz(Value1, Value2);
           TypeUnsigned ResultLlc = Funcs[f].FuncLlc(Value1, Value2);
           if (ResultSz == ResultLlc) {
             ++Passes;
           } else {
             ++Failures;
             std::cout << "test" << Funcs[f].Name
                       << (CHAR_BIT * sizeof(TypeUnsigned)) << "(" << Value1
                       << ", " << Value2 << "): sz=" << (unsigned)ResultSz
                       << " llc=" << (unsigned)ResultLlc << std::endl;
           }
         }
       }
     }
   } else {
     // This is the 64-bit version.  Test values are synthesized from
     // the 32-bit values in Values[].
     for (size_t f = 0; f < NumFuncs; ++f) {
       for (size_t iLo = 0; iLo < NumValues; ++iLo) {
         for (size_t iHi = 0; iHi < NumValues; ++iHi) {
           for (size_t jLo = 0; jLo < NumValues; ++jLo) {
             for (size_t jHi = 0; jHi < NumValues; ++jHi) {
               TypeUnsigned Value1 =
                   (((TypeUnsigned)Values[iHi]) << 32) + Values[iLo];
               TypeUnsigned Value2 =
                   (((TypeUnsigned)Values[jHi]) << 32) + Values[jLo];
               if (Funcs[f].ExcludeDivExceptions &&
                   inputsMayTriggerException<TypeSigned>(Value1, Value2))
                 continue;
               ++TotalTests;
               TypeUnsigned ResultSz = Funcs[f].FuncSz(Value1, Value2);
               TypeUnsigned ResultLlc = Funcs[f].FuncLlc(Value1, Value2);
               if (ResultSz == ResultLlc) {
                 ++Passes;
               } else {
                 ++Failures;
                 std::cout << "test" << Funcs[f].Name
                           << (CHAR_BIT * sizeof(TypeUnsigned)) << "(" << Value1
                           << ", " << Value2 << "): sz=" << (unsigned)ResultSz
                           << " llc=" << (unsigned)ResultLlc << std::endl;
               }
             }
           }
         }
       }
     }
   }
 }

 // Vectors are deterministically constructed by selecting elements from
 // a pool of scalar values based on a pseudorandom sequence.  Testing
 // all possible combinations of scalar values from the value table is
 // not tractable.
 // TODO: Replace with a portable PRNG from C++11.
 class PRNG {
 public:
   PRNG(uint32_t Seed = 1) : State(Seed) {}

   uint32_t operator()() {
     // Lewis, Goodman, and Miller (1969)
     State = (16807 * State) % 2147483647;
     return State;
   }

 private:
   uint32_t State;
 };

 const static size_t MaxTestsPerFunc = 100000;

 template <typename Type, typename ElementType, typename CastType>
 void outputVector(const Type Vect) {
   const static size_t NumElementsInType = sizeof(Type) / sizeof(ElementType);
   for (size_t i = 0; i < NumElementsInType; ++i) {
     if (i > 0)
       std::cout << ", ";
     std::cout << (CastType) Vect[i];
   }
 }

 template <typename TypeUnsigned, typename TypeSigned,
           typename ElementTypeUnsigned, typename ElementTypeSigned>
 void testsVecInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
   typedef TypeUnsigned (*FuncTypeUnsigned)(TypeUnsigned, TypeUnsigned);
   typedef TypeSigned (*FuncTypeSigned)(TypeSigned, TypeSigned);
   volatile unsigned Values[] = INT_VALUE_ARRAY;
   const static size_t NumValues = sizeof(Values) / sizeof(*Values);
   static struct {
     const char *Name;
     FuncTypeUnsigned FuncLlc;
     FuncTypeUnsigned FuncSz;
     bool ExcludeDivExceptions; // for divide related tests
   } Funcs[] = {
 #define X(inst, op, isdiv)                                                     \
   {                                                                            \
     STR(inst), (FuncTypeUnsigned)test##inst,                                   \
         (FuncTypeUnsigned)Subzero_::test##inst, isdiv                          \
   }                                                                            \
   ,
         UINTOP_TABLE
 #undef X
 #define X(inst, op, isdiv)                                                     \
   {                                                                            \
     STR(inst), (FuncTypeUnsigned)(FuncTypeSigned)test##inst,                   \
         (FuncTypeUnsigned)(FuncTypeSigned)Subzero_::test##inst, isdiv          \
   }                                                                            \
   ,
         SINTOP_TABLE
 #undef X
   };
   const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
   const static size_t NumElementsInType =
       sizeof(TypeUnsigned) / sizeof(ElementTypeUnsigned);
   for (size_t f = 0; f < NumFuncs; ++f) {
     PRNG Index;
     for (size_t i = 0; i < MaxTestsPerFunc; ++i) {
       // Initialize the test vectors.
       TypeUnsigned Value1, Value2;
       for (size_t j = 0; j < NumElementsInType;) {
         ElementTypeUnsigned Element1 = Values[Index() % NumValues];
         ElementTypeUnsigned Element2 = Values[Index() % NumValues];
         if (Funcs[f].ExcludeDivExceptions &&
             inputsMayTriggerException<ElementTypeSigned>(Element1, Element2))
           continue;
         Value1[j] = Element1;
         Value2[j] = Element2;
         ++j;
       }
       // Perform the test.
       TypeUnsigned ResultSz = Funcs[f].FuncSz(Value1, Value2);
       TypeUnsigned ResultLlc = Funcs[f].FuncLlc(Value1, Value2);
       ++TotalTests;
       if (!memcmp(&ResultSz, &ResultLlc, sizeof(ResultSz))) {
         ++Passes;
       } else {
         std::cout << "test" << Funcs[f].Name << "v" << NumElementsInType << "i"
                   << (CHAR_BIT * sizeof(ElementTypeUnsigned)) << "(";
          outputVector<TypeUnsigned, ElementTypeUnsigned, unsigned>(Value1);
          std::cout << ", ";
          outputVector<TypeUnsigned, ElementTypeUnsigned, unsigned>(Value2);
          std::cout << "): sz=";
          outputVector<TypeUnsigned, ElementTypeUnsigned, unsigned>(ResultSz);
          std::cout << " llc=";
          outputVector<TypeUnsigned, ElementTypeUnsigned, unsigned>(ResultLlc);
          std::cout << std::endl;
       }
     }
   }
 }

 template <typename Type>
 void testsFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
   static const Type NegInf = -1.0 / 0.0;
   static const Type PosInf = 1.0 / 0.0;
   static const Type Nan = 0.0 / 0.0;
   static const Type NegNan = -0.0 / 0.0;
   volatile Type Values[] = FP_VALUE_ARRAY(NegInf, PosInf, NegNan, Nan);
   const static size_t NumValues = sizeof(Values) / sizeof(*Values);
   typedef Type (*FuncType)(Type, Type);
   static struct {
     const char *Name;
     FuncType FuncLlc;
     FuncType FuncSz;
   } Funcs[] = {
 #define X(inst, op, func)                                                      \
   { STR(inst), (FuncType)test##inst, (FuncType)Subzero_::test##inst }          \
   ,
       FPOP_TABLE
 #undef X
   };
   const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);

   for (size_t f = 0; f < NumFuncs; ++f) {
     for (size_t i = 0; i < NumValues; ++i) {
       for (size_t j = 0; j < NumValues; ++j) {
         Type Value1 = Values[i];
         Type Value2 = Values[j];
         ++TotalTests;
         Type ResultSz = Funcs[f].FuncSz(Value1, Value2);
         Type ResultLlc = Funcs[f].FuncLlc(Value1, Value2);
         // Compare results using memcmp() in case they are both NaN.
         if (!memcmp(&ResultSz, &ResultLlc, sizeof(Type))) {
           ++Passes;
         } else {
           ++Failures;
           std::cout << std::fixed << "test" << Funcs[f].Name
                     << (CHAR_BIT * sizeof(Type)) << "(" << Value1 << ", "
                     << Value2 << "): sz=" << ResultSz << " llc=" << ResultLlc
                     << std::endl;
         }
       }
     }
   }
   for (size_t i = 0; i < NumValues; ++i) {
     Type Value = Values[i];
     ++TotalTests;
     Type ResultSz = Subzero_::mySqrt(Value);
     Type ResultLlc = mySqrt(Value);
     // Compare results using memcmp() in case they are both NaN.
     if (!memcmp(&ResultSz, &ResultLlc, sizeof(Type))) {
       ++Passes;
     } else {
       ++Failures;
       std::cout << std::fixed << "test_sqrt" << (CHAR_BIT * sizeof(Type)) << "("
                 << Value << "): sz=" << ResultSz << " llc=" << ResultLlc
                 << std::endl;
     }
   }
 }

 void testsVecFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
   static const float NegInf = -1.0 / 0.0;
   static const float PosInf = 1.0 / 0.0;
   static const float Nan = 0.0 / 0.0;
   static const float NegNan = -0.0 / 0.0;
   volatile float Values[] = FP_VALUE_ARRAY(NegInf, PosInf, NegNan, Nan);
   const static size_t NumValues = sizeof(Values) / sizeof(*Values);
   typedef v4f32 (*FuncType)(v4f32, v4f32);
   static struct {
     const char *Name;
     FuncType FuncLlc;
     FuncType FuncSz;
   } Funcs[] = {
 #define X(inst, op, func)                                                      \
   { STR(inst), (FuncType)test##inst, (FuncType)Subzero_::test##inst }          \
   ,
       FPOP_TABLE
 #undef X
   };
   const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
   const static size_t NumElementsInType = 4;
   for (size_t f = 0; f < NumFuncs; ++f) {
     PRNG Index;
     for (size_t i = 0; i < MaxTestsPerFunc; ++i) {
       // Initialize the test vectors.
       v4f32 Value1, Value2;
       for (size_t j = 0; j < NumElementsInType; ++j) {
         Value1[j] = Values[Index() % NumValues];
         Value2[j] = Values[Index() % NumValues];
       }
       // Perform the test.
       v4f32 ResultSz = Funcs[f].FuncSz(Value1, Value2);
       v4f32 ResultLlc = Funcs[f].FuncLlc(Value1, Value2);
       ++TotalTests;
       if (!memcmp(&ResultSz, &ResultLlc, sizeof(ResultSz))) {
         ++Passes;
       } else {
         ++Failures;
         std::cout << std::fixed << "test" << Funcs[f].Name << "v4f32"
                   << "(";
         outputVector<v4f32, float, float>(Value1);
         std::cout << ", ";
         outputVector<v4f32, float, float>(Value2);
         std::cout << "): sz=";
         outputVector<v4f32, float, float>(ResultSz);
         std::cout << " llc=";
         outputVector<v4f32, float, float>(ResultLlc);
         std::cout << std::endl;
       }
     }
   }
 }

 int main(int argc, char **argv) {
   size_t TotalTests = 0;
   size_t Passes = 0;
   size_t Failures = 0;

   testsInt<uint8_t, int8_t>(TotalTests, Passes, Failures);
   testsInt<uint16_t, int16_t>(TotalTests, Passes, Failures);
   testsInt<uint32_t, int32_t>(TotalTests, Passes, Failures);
   testsInt<uint64_t, int64_t>(TotalTests, Passes, Failures);
   testsVecInt<v4ui32, v4si32, uint32_t, int32_t>(TotalTests, Passes, Failures);
   testsVecInt<v8ui16, v8si16, uint16_t, int16_t>(TotalTests, Passes, Failures);
   testsVecInt<v16ui8, v16si8, uint8_t, int8_t>(TotalTests, Passes, Failures);
   testsFp<float>(TotalTests, Passes, Failures);
   testsFp<double>(TotalTests, Passes, Failures);
   testsVecFp(TotalTests, Passes, Failures);

   std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
             << " Failures=" << Failures << "\n";
   return Failures;
 }

 extern "C" {
 // Subzero helpers
   v4si32 Sz_shl_v4i32(v4si32 a, v4si32 b) { return a << b; }
   v4si32 Sz_ashr_v4i32(v4si32 a, v4si32 b) { return a >> b; }
   v4ui32 Sz_lshr_v4i32(v4ui32 a, v4ui32 b) { return a >> b; }
   v4si32 Sz_sdiv_v4i32(v4si32 a, v4si32 b) { return a / b; }
   v4ui32 Sz_udiv_v4i32(v4ui32 a, v4ui32 b) { return a / b; }
   v4si32 Sz_srem_v4i32(v4si32 a, v4si32 b) { return a % b; }
   v4ui32 Sz_urem_v4i32(v4ui32 a, v4ui32 b) { return a % b; }

   v8si16 Sz_shl_v8i16(v8si16 a, v8si16 b) { return a << b; }
   v8si16 Sz_ashr_v8i16(v8si16 a, v8si16 b) { return a >> b; }
   v8ui16 Sz_lshr_v8i16(v8ui16 a, v8ui16 b) { return a >> b; }
   v8si16 Sz_sdiv_v8i16(v8si16 a, v8si16 b) { return a / b; }
   v8ui16 Sz_udiv_v8i16(v8ui16 a, v8ui16 b) { return a / b; }
   v8si16 Sz_srem_v8i16(v8si16 a, v8si16 b) { return a % b; }
   v8ui16 Sz_urem_v8i16(v8ui16 a, v8ui16 b) { return a % b; }

   v16ui8 Sz_mul_v16i8(v16ui8 a, v16ui8 b) { return a * b; }
   v16si8 Sz_shl_v16i8(v16si8 a, v16si8 b) { return a << b; }
   v16si8 Sz_ashr_v16i8(v16si8 a, v16si8 b) { return a >> b; }
   v16ui8 Sz_lshr_v16i8(v16ui8 a, v16ui8 b) { return a >> b; }
   v16si8 Sz_sdiv_v16i8(v16si8 a, v16si8 b) { return a / b; }
   v16ui8 Sz_udiv_v16i8(v16ui8 a, v16ui8 b) { return a / b; }
   v16si8 Sz_srem_v16i8(v16si8 a, v16si8 b) { return a % b; }
   v16ui8 Sz_urem_v16i8(v16ui8 a, v16ui8 b) { return a % b; }

   v4f32 Sz_frem_v4f32(v4f32 a, v4f32 b) {
     v4f32 Result;
     for (int i = 0; i < 4; ++i)
       Result[i] = fmodf(a[i], b[i]);
     return Result;
   }
 }
	/* crosstest.py --test=test_arith.cpp --test=test_arith_frem.ll \
	--test=test_arith_sqrt.ll --driver=test_arith_main.cpp \
	--prefix=Subzero_ --output=test_arith */

	#include <stdint.h>

	#include <climits> // CHAR_BIT
	#include <limits>
	#include <cfloat>
	#include <cmath> // fmodf
	#include <cstring> // memcmp
	#include <iostream>

	// Include test_arith.h twice - once normally, and once within the
	// Subzero_ namespace, corresponding to the llc and Subzero translated
	// object files, respectively.
	#include "test_arith.h"
	namespace Subzero_ {
	#include "test_arith.h"
	}

	template <class T> bool inputsMayTriggerException(T Value1, T Value2) {
	// Avoid HW divide-by-zero exception.
	if (Value2 == 0)
	return true;
	// Avoid HW overflow exception (on x86-32). TODO: adjust
	// for other architecture.
	if (Value1 == std::numeric_limits<T>::min() && Value2 == -1)
	return true;
	return false;
	}

	template <typename TypeUnsigned, typename TypeSigned>
	void testsInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
	typedef TypeUnsigned (*FuncTypeUnsigned)(TypeUnsigned, TypeUnsigned);
	typedef TypeSigned (*FuncTypeSigned)(TypeSigned, TypeSigned);
	volatile unsigned Values[] = INT_VALUE_ARRAY;
	const static size_t NumValues = sizeof(Values) / sizeof(*Values);
	static struct {
	const char *Name;
	FuncTypeUnsigned FuncLlc;
	FuncTypeUnsigned FuncSz;
	bool ExcludeDivExceptions; // for divide related tests
	} Funcs[] = {
	#define X(inst, op, isdiv) \
	{ \
	STR(inst), (FuncTypeUnsigned)test##inst, \
	(FuncTypeUnsigned)Subzero_::test##inst, isdiv \
	} \
	,
	UINTOP_TABLE
	#undef X
	#define X(inst, op, isdiv) \
	{ \
	STR(inst), (FuncTypeUnsigned)(FuncTypeSigned)test##inst, \
	(FuncTypeUnsigned)(FuncTypeSigned)Subzero_::test##inst, isdiv \
	} \
	,
	SINTOP_TABLE
	#undef X
	};
	const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);

	if (sizeof(TypeUnsigned) <= sizeof(uint32_t)) {
	// This is the "normal" version of the loop nest, for 32-bit or
	// narrower types.
	for (size_t f = 0; f < NumFuncs; ++f) {
	for (size_t i = 0; i < NumValues; ++i) {
	for (size_t j = 0; j < NumValues; ++j) {
	TypeUnsigned Value1 = Values[i];
	TypeUnsigned Value2 = Values[j];
	// Avoid HW divide-by-zero exception.
	if (Funcs[f].ExcludeDivExceptions &&
	inputsMayTriggerException<TypeSigned>(Value1, Value2))
	continue;
	++TotalTests;
	TypeUnsigned ResultSz = Funcs[f].FuncSz(Value1, Value2);
	TypeUnsigned ResultLlc = Funcs[f].FuncLlc(Value1, Value2);
	if (ResultSz == ResultLlc) {
	++Passes;
	} else {
	++Failures;
	std::cout << "test" << Funcs[f].Name
	<< (CHAR_BIT * sizeof(TypeUnsigned)) << "(" << Value1
	<< ", " << Value2 << "): sz=" << (unsigned)ResultSz
	<< " llc=" << (unsigned)ResultLlc << std::endl;
	}
	}
	}
	}
	} else {
	// This is the 64-bit version. Test values are synthesized from
	// the 32-bit values in Values[].
	for (size_t f = 0; f < NumFuncs; ++f) {
	for (size_t iLo = 0; iLo < NumValues; ++iLo) {
	for (size_t iHi = 0; iHi < NumValues; ++iHi) {
	for (size_t jLo = 0; jLo < NumValues; ++jLo) {
	for (size_t jHi = 0; jHi < NumValues; ++jHi) {
	TypeUnsigned Value1 =
	(((TypeUnsigned)Values[iHi]) << 32) + Values[iLo];
	TypeUnsigned Value2 =
	(((TypeUnsigned)Values[jHi]) << 32) + Values[jLo];
	if (Funcs[f].ExcludeDivExceptions &&
	inputsMayTriggerException<TypeSigned>(Value1, Value2))
	continue;
	++TotalTests;
	TypeUnsigned ResultSz = Funcs[f].FuncSz(Value1, Value2);
	TypeUnsigned ResultLlc = Funcs[f].FuncLlc(Value1, Value2);
	if (ResultSz == ResultLlc) {
	++Passes;
	} else {
	++Failures;
	std::cout << "test" << Funcs[f].Name
	<< (CHAR_BIT * sizeof(TypeUnsigned)) << "(" << Value1
	<< ", " << Value2 << "): sz=" << (unsigned)ResultSz
	<< " llc=" << (unsigned)ResultLlc << std::endl;
	}
	}
	}
	}
	}
	}
	}
	}

	// Vectors are deterministically constructed by selecting elements from
	// a pool of scalar values based on a pseudorandom sequence. Testing
	// all possible combinations of scalar values from the value table is
	// not tractable.
	// TODO: Replace with a portable PRNG from C++11.
	class PRNG {
	public:
	PRNG(uint32_t Seed = 1) : State(Seed) {}

	uint32_t operator()() {
	// Lewis, Goodman, and Miller (1969)
	State = (16807 * State) % 2147483647;
	return State;
	}

	private:
	uint32_t State;
	};

	const static size_t MaxTestsPerFunc = 100000;

	template <typename Type, typename ElementType, typename CastType>
	void outputVector(const Type Vect) {
	const static size_t NumElementsInType = sizeof(Type) / sizeof(ElementType);
	for (size_t i = 0; i < NumElementsInType; ++i) {
	if (i > 0)
	std::cout << ", ";
	std::cout << (CastType) Vect[i];
	}
	}

	template <typename TypeUnsigned, typename TypeSigned,
	typename ElementTypeUnsigned, typename ElementTypeSigned>
	void testsVecInt(size_t &TotalTests, size_t &Passes, size_t &Failures) {
	typedef TypeUnsigned (*FuncTypeUnsigned)(TypeUnsigned, TypeUnsigned);
	typedef TypeSigned (*FuncTypeSigned)(TypeSigned, TypeSigned);
	volatile unsigned Values[] = INT_VALUE_ARRAY;
	const static size_t NumValues = sizeof(Values) / sizeof(*Values);
	static struct {
	const char *Name;
	FuncTypeUnsigned FuncLlc;
	FuncTypeUnsigned FuncSz;
	bool ExcludeDivExceptions; // for divide related tests
	} Funcs[] = {
	#define X(inst, op, isdiv) \
	{ \
	STR(inst), (FuncTypeUnsigned)test##inst, \
	(FuncTypeUnsigned)Subzero_::test##inst, isdiv \
	} \
	,
	UINTOP_TABLE
	#undef X
	#define X(inst, op, isdiv) \
	{ \
	STR(inst), (FuncTypeUnsigned)(FuncTypeSigned)test##inst, \
	(FuncTypeUnsigned)(FuncTypeSigned)Subzero_::test##inst, isdiv \
	} \
	,
	SINTOP_TABLE
	#undef X
	};
	const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
	const static size_t NumElementsInType =
	sizeof(TypeUnsigned) / sizeof(ElementTypeUnsigned);
	for (size_t f = 0; f < NumFuncs; ++f) {
	PRNG Index;
	for (size_t i = 0; i < MaxTestsPerFunc; ++i) {
	// Initialize the test vectors.
	TypeUnsigned Value1, Value2;
	for (size_t j = 0; j < NumElementsInType;) {
	ElementTypeUnsigned Element1 = Values[Index() % NumValues];
	ElementTypeUnsigned Element2 = Values[Index() % NumValues];
	if (Funcs[f].ExcludeDivExceptions &&
	inputsMayTriggerException<ElementTypeSigned>(Element1, Element2))
	continue;
	Value1[j] = Element1;
	Value2[j] = Element2;
	++j;
	}
	// Perform the test.
	TypeUnsigned ResultSz = Funcs[f].FuncSz(Value1, Value2);
	TypeUnsigned ResultLlc = Funcs[f].FuncLlc(Value1, Value2);
	++TotalTests;
	if (!memcmp(&ResultSz, &ResultLlc, sizeof(ResultSz))) {
	++Passes;
	} else {
	std::cout << "test" << Funcs[f].Name << "v" << NumElementsInType << "i"
	<< (CHAR_BIT * sizeof(ElementTypeUnsigned)) << "(";
	outputVector<TypeUnsigned, ElementTypeUnsigned, unsigned>(Value1);
	std::cout << ", ";
	outputVector<TypeUnsigned, ElementTypeUnsigned, unsigned>(Value2);
	std::cout << "): sz=";
	outputVector<TypeUnsigned, ElementTypeUnsigned, unsigned>(ResultSz);
	std::cout << " llc=";
	outputVector<TypeUnsigned, ElementTypeUnsigned, unsigned>(ResultLlc);
	std::cout << std::endl;
	}
	}
	}
	}

	template <typename Type>
	void testsFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
	static const Type NegInf = -1.0 / 0.0;
	static const Type PosInf = 1.0 / 0.0;
	static const Type Nan = 0.0 / 0.0;
	static const Type NegNan = -0.0 / 0.0;
	volatile Type Values[] = FP_VALUE_ARRAY(NegInf, PosInf, NegNan, Nan);
	const static size_t NumValues = sizeof(Values) / sizeof(*Values);
	typedef Type (*FuncType)(Type, Type);
	static struct {
	const char *Name;
	FuncType FuncLlc;
	FuncType FuncSz;
	} Funcs[] = {
	#define X(inst, op, func) \
	{ STR(inst), (FuncType)test##inst, (FuncType)Subzero_::test##inst } \
	,
	FPOP_TABLE
	#undef X
	};
	const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);

	for (size_t f = 0; f < NumFuncs; ++f) {
	for (size_t i = 0; i < NumValues; ++i) {
	for (size_t j = 0; j < NumValues; ++j) {
	Type Value1 = Values[i];
	Type Value2 = Values[j];
	++TotalTests;
	Type ResultSz = Funcs[f].FuncSz(Value1, Value2);
	Type ResultLlc = Funcs[f].FuncLlc(Value1, Value2);
	// Compare results using memcmp() in case they are both NaN.
	if (!memcmp(&ResultSz, &ResultLlc, sizeof(Type))) {
	++Passes;
	} else {
	++Failures;
	std::cout << std::fixed << "test" << Funcs[f].Name
	<< (CHAR_BIT * sizeof(Type)) << "(" << Value1 << ", "
	<< Value2 << "): sz=" << ResultSz << " llc=" << ResultLlc
	<< std::endl;
	}
	}
	}
	}
	for (size_t i = 0; i < NumValues; ++i) {
	Type Value = Values[i];
	++TotalTests;
	Type ResultSz = Subzero_::mySqrt(Value);
	Type ResultLlc = mySqrt(Value);
	// Compare results using memcmp() in case they are both NaN.
	if (!memcmp(&ResultSz, &ResultLlc, sizeof(Type))) {
	++Passes;
	} else {
	++Failures;
	std::cout << std::fixed << "test_sqrt" << (CHAR_BIT * sizeof(Type)) << "("
	<< Value << "): sz=" << ResultSz << " llc=" << ResultLlc
	<< std::endl;
	}
	}
	}

	void testsVecFp(size_t &TotalTests, size_t &Passes, size_t &Failures) {
	static const float NegInf = -1.0 / 0.0;
	static const float PosInf = 1.0 / 0.0;
	static const float Nan = 0.0 / 0.0;
	static const float NegNan = -0.0 / 0.0;
	volatile float Values[] = FP_VALUE_ARRAY(NegInf, PosInf, NegNan, Nan);
	const static size_t NumValues = sizeof(Values) / sizeof(*Values);
	typedef v4f32 (*FuncType)(v4f32, v4f32);
	static struct {
	const char *Name;
	FuncType FuncLlc;
	FuncType FuncSz;
	} Funcs[] = {
	#define X(inst, op, func) \
	{ STR(inst), (FuncType)test##inst, (FuncType)Subzero_::test##inst } \
	,
	FPOP_TABLE
	#undef X
	};
	const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
	const static size_t NumElementsInType = 4;
	for (size_t f = 0; f < NumFuncs; ++f) {
	PRNG Index;
	for (size_t i = 0; i < MaxTestsPerFunc; ++i) {
	// Initialize the test vectors.
	v4f32 Value1, Value2;
	for (size_t j = 0; j < NumElementsInType; ++j) {
	Value1[j] = Values[Index() % NumValues];
	Value2[j] = Values[Index() % NumValues];
	}
	// Perform the test.
	v4f32 ResultSz = Funcs[f].FuncSz(Value1, Value2);
	v4f32 ResultLlc = Funcs[f].FuncLlc(Value1, Value2);
	++TotalTests;
	if (!memcmp(&ResultSz, &ResultLlc, sizeof(ResultSz))) {
	++Passes;
	} else {
	++Failures;
	std::cout << std::fixed << "test" << Funcs[f].Name << "v4f32"
	<< "(";
	outputVector<v4f32, float, float>(Value1);
	std::cout << ", ";
	outputVector<v4f32, float, float>(Value2);
	std::cout << "): sz=";
	outputVector<v4f32, float, float>(ResultSz);
	std::cout << " llc=";
	outputVector<v4f32, float, float>(ResultLlc);
	std::cout << std::endl;
	}
	}
	}
	}

	int main(int argc, char **argv) {
	size_t TotalTests = 0;
	size_t Passes = 0;
	size_t Failures = 0;

	testsInt<uint8_t, int8_t>(TotalTests, Passes, Failures);
	testsInt<uint16_t, int16_t>(TotalTests, Passes, Failures);
	testsInt<uint32_t, int32_t>(TotalTests, Passes, Failures);
	testsInt<uint64_t, int64_t>(TotalTests, Passes, Failures);
	testsVecInt<v4ui32, v4si32, uint32_t, int32_t>(TotalTests, Passes, Failures);
	testsVecInt<v8ui16, v8si16, uint16_t, int16_t>(TotalTests, Passes, Failures);
	testsVecInt<v16ui8, v16si8, uint8_t, int8_t>(TotalTests, Passes, Failures);
	testsFp<float>(TotalTests, Passes, Failures);
	testsFp<double>(TotalTests, Passes, Failures);
	testsVecFp(TotalTests, Passes, Failures);

	std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
	<< " Failures=" << Failures << "\n";
	return Failures;
	}

	extern "C" {
	// Subzero helpers
	v4si32 Sz_shl_v4i32(v4si32 a, v4si32 b) { return a << b; }
	v4si32 Sz_ashr_v4i32(v4si32 a, v4si32 b) { return a >> b; }
	v4ui32 Sz_lshr_v4i32(v4ui32 a, v4ui32 b) { return a >> b; }
	v4si32 Sz_sdiv_v4i32(v4si32 a, v4si32 b) { return a / b; }
	v4ui32 Sz_udiv_v4i32(v4ui32 a, v4ui32 b) { return a / b; }
	v4si32 Sz_srem_v4i32(v4si32 a, v4si32 b) { return a % b; }
	v4ui32 Sz_urem_v4i32(v4ui32 a, v4ui32 b) { return a % b; }

	v8si16 Sz_shl_v8i16(v8si16 a, v8si16 b) { return a << b; }
	v8si16 Sz_ashr_v8i16(v8si16 a, v8si16 b) { return a >> b; }
	v8ui16 Sz_lshr_v8i16(v8ui16 a, v8ui16 b) { return a >> b; }
	v8si16 Sz_sdiv_v8i16(v8si16 a, v8si16 b) { return a / b; }
	v8ui16 Sz_udiv_v8i16(v8ui16 a, v8ui16 b) { return a / b; }
	v8si16 Sz_srem_v8i16(v8si16 a, v8si16 b) { return a % b; }
	v8ui16 Sz_urem_v8i16(v8ui16 a, v8ui16 b) { return a % b; }

	v16ui8 Sz_mul_v16i8(v16ui8 a, v16ui8 b) { return a * b; }
	v16si8 Sz_shl_v16i8(v16si8 a, v16si8 b) { return a << b; }
	v16si8 Sz_ashr_v16i8(v16si8 a, v16si8 b) { return a >> b; }
	v16ui8 Sz_lshr_v16i8(v16ui8 a, v16ui8 b) { return a >> b; }
	v16si8 Sz_sdiv_v16i8(v16si8 a, v16si8 b) { return a / b; }
	v16ui8 Sz_udiv_v16i8(v16ui8 a, v16ui8 b) { return a / b; }
	v16si8 Sz_srem_v16i8(v16si8 a, v16si8 b) { return a % b; }
	v16ui8 Sz_urem_v16i8(v16ui8 a, v16ui8 b) { return a % b; }

	v4f32 Sz_frem_v4f32(v4f32 a, v4f32 b) {
	v4f32 Result;
	for (int i = 0; i < 4; ++i)
	Result[i] = fmodf(a[i], b[i]);
	return Result;
	}
	}