Avoid ambiguous vector casts.

Bug swiftshader:15

Change-Id: Ia42d21b4f2c9e19a839ffb414661f2dffa350692
Reviewed-on: https://swiftshader-review.googlesource.com/7711
Reviewed-by: Nicolas Capens <capn@google.com>
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-on: https://swiftshader-review.googlesource.com/7630
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Main/FrameBuffer.cpp b/src/Main/FrameBuffer.cpp
index 2ac2a49..d0add5e 100644
--- a/src/Main/FrameBuffer.cpp
+++ b/src/Main/FrameBuffer.cpp
@@ -621,7 +621,7 @@
 		{
 		case FORMAT_X8R8G8B8:
 		case FORMAT_A8R8G8B8:
-			*Pointer<UInt>(d) = UInt(As<Long>(Pack(As<UShort4>(c1), As<UShort4>(c1))));
+			*Pointer<Byte4>(d) = Byte4(Pack(As<UShort4>(c1), As<UShort4>(c1)));
 			break;
 		case FORMAT_X8B8G8R8:
 		case FORMAT_A8B8G8R8:
@@ -630,7 +630,7 @@
 			{
 				c1 = Swizzle(c1, 0xC6);
 
-				*Pointer<UInt>(d) = UInt(As<Long>(Pack(As<UShort4>(c1), As<UShort4>(c1))));
+				*Pointer<Byte4>(d) = Byte4(Pack(As<UShort4>(c1), As<UShort4>(c1)));
 			}
 			break;
 		case FORMAT_R8G8B8:
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 8b32d1f..fdc358a 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -1931,6 +1931,21 @@
 		return T(llvm::Type::getInt16Ty(*::context));
 	}
 
+	Byte4::Byte4(RValue<Byte8> cast)
+	{
+	//	xyzw.parent = this;
+
+		storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), Int::getType()));
+	}
+
+	Byte4::Byte4(const Reference<Byte4> &rhs)
+	{
+	//	xyzw.parent = this;
+
+		Value *value = rhs.loadValue();
+		storeValue(value);
+	}
+
 	Type *Byte4::getType()
 	{
 		#if 0
@@ -1972,24 +1987,6 @@
 		storeValue(Nucleus::createBitCast(vector, getType()));
 	}
 
-	Byte8::Byte8(int64_t x)
-	{
-	//	xyzw.parent = this;
-
-		Constant *constantVector[8];
-		constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >>  0));
-		constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >>  8));
-		constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16));
-		constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24));
-		constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32));
-		constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40));
-		constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48));
-		constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56));
-		Value *vector = V(Nucleus::createConstantVector(constantVector, 8));
-
-		storeValue(Nucleus::createBitCast(vector, getType()));
-	}
-
 	Byte8::Byte8(RValue<Byte8> rhs)
 	{
 	//	xyzw.parent = this;
@@ -2185,7 +2182,7 @@
 	{
 		if(CPUID::supportsMMX2())
 		{
-			return val ^ Byte8(0xFFFFFFFFFFFFFFFF);
+			return val ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
 		}
 		else
 		{
@@ -2291,24 +2288,6 @@
 		storeValue(Nucleus::createBitCast(vector, getType()));
 	}
 
-	SByte8::SByte8(int64_t x)
-	{
-	//	xyzw.parent = this;
-
-		Constant *constantVector[8];
-		constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >>  0));
-		constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >>  8));
-		constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16));
-		constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24));
-		constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32));
-		constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40));
-		constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48));
-		constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56));
-		Value *vector = V(Nucleus::createConstantVector(constantVector, 8));
-
-		storeValue(Nucleus::createBitCast(vector, getType()));
-	}
-
 	SByte8::SByte8(RValue<SByte8> rhs)
 	{
 	//	xyzw.parent = this;
@@ -2483,7 +2462,7 @@
 	{
 		if(CPUID::supportsMMX2())
 		{
-			return val ^ SByte8(0xFFFFFFFFFFFFFFFF);
+			return val ^ SByte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
 		}
 		else
 		{
@@ -2614,6 +2593,34 @@
 		return T( VectorType::get(SByte::getType(), 16));
 	}
 
+	Short2::Short2(RValue<Short4> cast)
+	{
+		storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType()));
+	}
+
+	Type *Short2::getType()
+	{
+		#if 0
+			return T(VectorType::get(Short::getType(), 2));
+		#else
+			return UInt::getType();   // FIXME: LLVM doesn't manipulate it as one 32-bit block
+		#endif
+	}
+
+	UShort2::UShort2(RValue<UShort4> cast)
+	{
+		storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType()));
+	}
+
+	Type *UShort2::getType()
+	{
+		#if 0
+			return T(VectorType::get(UShort::getType(), 2));
+		#else
+			return UInt::getType();   // FIXME: LLVM doesn't manipulate it as one 32-bit block
+		#endif
+	}
+
 	Short4::Short4(RValue<Int> cast)
 	{
 		Value *extend = Nucleus::createZExt(cast.value, Long::getType());
@@ -3331,6 +3338,42 @@
 		}
 	}
 
+	RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
+	{
+		if(CPUID::supportsMMX2())
+		{
+			return As<UShort4>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
+		}
+		else
+		{
+			return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
+		}
+	}
+
+	RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
+	{
+		if(CPUID::supportsMMX2())
+		{
+			return As<UShort4>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
+		}
+		else
+		{
+			return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
+		}
+	}
+
+	RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
+	{
+		if(CPUID::supportsMMX2())
+		{
+			return As<UShort4>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
+		}
+		else
+		{
+			return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
+		}
+	}
+
 	RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
 	{
 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 60cbabc..cbef855 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -34,6 +34,8 @@
 	class SByte16;
 	class Short;
 	class UShort;
+	class Short2;
+	class UShort2;
 	class Short4;
 	class UShort4;
 	class Short8;
@@ -429,11 +431,13 @@
 	class Byte4 : public Variable<Byte4>
 	{
 	public:
+		explicit Byte4(RValue<Byte8> cast);
+
 	//	Byte4();
 	//	Byte4(int x, int y, int z, int w);
 	//	Byte4(RValue<Byte4> rhs);
 	//	Byte4(const Byte4 &rhs);
-	//	Byte4(const Reference<Byte4> &rhs);
+		Byte4(const Reference<Byte4> &rhs);
 
 	//	RValue<Byte4> operator=(RValue<Byte4> rhs) const;
 	//	RValue<Byte4> operator=(const Byte4 &rhs) const;
@@ -519,7 +523,6 @@
 	public:
 		Byte8();
 		Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7);
-		Byte8(int64_t x);
 		Byte8(RValue<Byte8> rhs);
 		Byte8(const Byte8 &rhs);
 		Byte8(const Reference<Byte8> &rhs);
@@ -573,7 +576,6 @@
 	public:
 		SByte8();
 		SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7);
-		SByte8(int64_t x);
 		SByte8(RValue<SByte8> rhs);
 		SByte8(const SByte8 &rhs);
 		SByte8(const Reference<SByte8> &rhs);
@@ -709,6 +711,22 @@
 //	RValue<SByte16> operator--(const SByte16 &val, int);   // Post-decrement
 //	const SByte16 &operator--(const SByte16 &val);   // Pre-decrement
 
+	class Short2 : public Variable<Short2>
+	{
+	public:
+		explicit Short2(RValue<Short4> cast);
+
+		static Type *getType();
+	};
+
+	class UShort2 : public Variable<UShort2>
+	{
+	public:
+		explicit UShort2(RValue<UShort4> cast);
+
+		static Type *getType();
+	};
+
 	class Short4 : public Variable<Short4>
 	{
 	public:
@@ -822,9 +840,9 @@
 	RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs);
 //	RValue<UShort4> operator/(RValue<UShort4> lhs, RValue<UShort4> rhs);
 //	RValue<UShort4> operator%(RValue<UShort4> lhs, RValue<UShort4> rhs);
-//	RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs);
-//	RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs);
-//	RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs);
+	RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs);
+	RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs);
+	RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs);
 	RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs);
 	RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs);
 	RValue<UShort4> operator<<(RValue<UShort4> lhs, RValue<Long1> rhs);
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index ae7ad53..c0778af 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -1939,22 +1939,28 @@
 		assert(false && "UNIMPLEMENTED"); return nullptr;
 	}
 
+	Byte4::Byte4(RValue<Byte8> cast)
+	{
+	//	xyzw.parent = this;
+
+		storeValue(Nucleus::createBitCast(cast.value, getType()));
+	}
+
+	Byte4::Byte4(const Reference<Byte4> &rhs)
+	{
+	//	xyzw.parent = this;
+
+		assert(false && "UNIMPLEMENTED");
+	}
+
 	Type *Byte4::getType()
 	{
-		#if 0
-			return VectorType::get(Byte::getType(), 4);
-		#else
-			return UInt::getType();   // FIXME
-		#endif
+		assert(false && "UNIMPLEMENTED"); return nullptr;
 	}
 
 	Type *SByte4::getType()
 	{
-		#if 0
-			return VectorType::get(SByte::getType(), 4);
-		#else
-			return Int::getType();   // FIXME
-		#endif
+		assert(false && "UNIMPLEMENTED"); return nullptr;
 	}
 
 	Byte8::Byte8()
@@ -1965,11 +1971,8 @@
 	Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
 	{
 	//	xyzw.parent = this;
-	}
 
-	Byte8::Byte8(int64_t x)
-	{
-	//	xyzw.parent = this;
+		assert(false && "UNIMPLEMENTED");
 	}
 
 	Byte8::Byte8(RValue<Byte8> rhs)
@@ -2190,13 +2193,6 @@
 		assert(false && "UNIMPLEMENTED");
 	}
 
-	SByte8::SByte8(int64_t x)
-	{
-	//	xyzw.parent = this;
-
-		assert(false && "UNIMPLEMENTED");
-	}
-
 	SByte8::SByte8(RValue<SByte8> rhs)
 	{
 	//	xyzw.parent = this;
@@ -2454,6 +2450,26 @@
 		assert(false && "UNIMPLEMENTED"); return nullptr;
 	}
 
+	Short2::Short2(RValue<Short4> cast)
+	{
+		assert(false && "UNIMPLEMENTED");
+	}
+
+	Type *Short2::getType()
+	{
+		assert(false && "UNIMPLEMENTED"); return nullptr;
+	}
+
+	UShort2::UShort2(RValue<UShort4> cast)
+	{
+		assert(false && "UNIMPLEMENTED");
+	}
+
+	Type *UShort2::getType()
+	{
+		assert(false && "UNIMPLEMENTED"); return nullptr;
+	}
+
 	Short4::Short4(RValue<Int> cast)
 	{
 		Value *extend = Nucleus::createZExt(cast.value, Long::getType());
@@ -2944,6 +2960,21 @@
 		assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
 	}
 
+	RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
+	{
+		assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
+	}
+
+	RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
+	{
+		assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
+	}
+
+	RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
+	{
+		assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
+	}
+
 	RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
 	{
 		assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
diff --git a/src/Renderer/Blitter.cpp b/src/Renderer/Blitter.cpp
index 086e7b7..ced6432 100644
--- a/src/Renderer/Blitter.cpp
+++ b/src/Renderer/Blitter.cpp
@@ -359,8 +359,7 @@
 			if(writeRGBA)
 			{
 				UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw));
-				Byte8 c1 = Pack(c0, c0);
-				*Pointer<UInt>(element) = UInt(As<Long>(c1));
+				*Pointer<Byte4>(element) = Byte4(Pack(c0, c0));
 			}
 			else
 			{
@@ -375,8 +374,7 @@
 			if(writeRGBA)
 			{
 				UShort4 c0 = As<UShort4>(RoundShort4(c));
-				Byte8 c1 = Pack(c0, c0);
-				*Pointer<UInt>(element) = UInt(As<Long>(c1));
+				*Pointer<Byte4>(element) = Byte4(Pack(c0, c0));
 			}
 			else
 			{
@@ -389,9 +387,8 @@
 		case FORMAT_X8R8G8B8:
 			if(writeRGBA)
 			{
-				UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw));
-				Byte8 c1 = Pack(c0, c0);
-				*Pointer<UInt>(element) = UInt(As<Long>(c1)) | 0xFF000000;
+				UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw)) | UShort4(0x0000, 0x0000, 0x0000, 0xFFFFu);
+				*Pointer<Byte4>(element) = Byte4(Pack(c0, c0));
 			}
 			else
 			{
@@ -405,9 +402,8 @@
 		case FORMAT_SRGB8_X8:
 			if(writeRGBA)
 			{
-				UShort4 c0 = As<UShort4>(RoundShort4(c));
-				Byte8 c1 = Pack(c0, c0);
-				*Pointer<UInt>(element) = UInt(As<Long>(c1)) | 0xFF000000;
+				UShort4 c0 = As<UShort4>(RoundShort4(c)) | UShort4(0x0000, 0x0000, 0x0000, 0xFFFFu);
+				*Pointer<Byte4>(element) = Byte4(Pack(c0, c0));
 			}
 			else
 			{
@@ -522,7 +518,7 @@
 		case FORMAT_G16R16I:
 			if(writeR && writeG)
 			{
-				*Pointer<UInt>(element) = UInt(As<Long>(Short4(RoundInt(c))));
+				*Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
 			}
 			else
 			{
@@ -564,7 +560,7 @@
 		case FORMAT_G16R16:
 			if(writeR && writeG)
 			{
-				*Pointer<UInt>(element) = UInt(As<Long>(UShort4(RoundInt(c))));
+				*Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
 			}
 			else
 			{
diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp
index 41472a4..2f2b43d 100644
--- a/src/Shader/PixelRoutine.cpp
+++ b/src/Shader/PixelRoutine.cpp
@@ -355,10 +355,10 @@
 		switch(stencilCompareMode)
 		{
 		case STENCIL_ALWAYS:
-			value = Byte8(0xFFFFFFFFFFFFFFFF);
+			value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
 			break;
 		case STENCIL_NEVER:
-			value = Byte8(0x0000000000000000);
+			value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
 			break;
 		case STENCIL_LESS:			// a < b ~ b > a
 			value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
@@ -369,7 +369,7 @@
 			break;
 		case STENCIL_NOTEQUAL:		// a != b ~ !(a == b)
 			value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
-			value ^= Byte8(0xFFFFFFFFFFFFFFFF);
+			value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
 			break;
 		case STENCIL_LESSEQUAL:	// a <= b ~ (b > a) || (a == b)
 			equal = value;
@@ -387,7 +387,7 @@
 		case STENCIL_GREATEREQUAL:	// a >= b ~ !(a < b) ~ !(b > a)
 			value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
 			value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
-			value ^= Byte8(0xFFFFFFFFFFFFFFFF);
+			value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
 			break;
 		default:
 			ASSERT(false);
@@ -799,7 +799,7 @@
 		bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
 		newValue |= bufferValue;
 
-		*Pointer<UInt>(buffer) = UInt(As<Long>(newValue));
+		*Pointer<Byte4>(buffer) = Byte4(newValue);
 	}
 
 	void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask)
@@ -843,7 +843,7 @@
 			output = bufferValue;
 			break;
 		case OPERATION_ZERO:
-			output = Byte8(0x0000000000000000);
+			output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
 			break;
 		case OPERATION_REPLACE:
 			output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceQ));
@@ -855,7 +855,7 @@
 			output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
 			break;
 		case OPERATION_INVERT:
-			output = bufferValue ^ Byte8(0xFFFFFFFFFFFFFFFF);
+			output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
 			break;
 		case OPERATION_INCR:
 			output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
diff --git a/src/Shader/SetupRoutine.cpp b/src/Shader/SetupRoutine.cpp
index 7db625c..1845047 100644
--- a/src/Shader/SetupRoutine.cpp
+++ b/src/Shader/SetupRoutine.cpp
@@ -114,13 +114,13 @@
 				{
 					If(A > 0.0f)
 					{
-						*Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) =  Byte8(0xFFFFFFFFFFFFFFFF);
-						*Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0x0000000000000000);
+						*Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+						*Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
 					}
 					Else
 					{
-						*Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) =  Byte8(0x0000000000000000);
-						*Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0xFFFFFFFFFFFFFFFF);
+						*Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+						*Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
 					}
 				}
 
@@ -133,8 +133,8 @@
 			{
 				if(state.twoSidedStencil)
 				{
-					*Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) =  Byte8(0xFFFFFFFFFFFFFFFF);
-					*Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0x0000000000000000);
+					*Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask)) = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
+					*Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask)) = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
 				}
 			}