Address cubemap faces as consecutive layers

Vulkan cubemaps use six consecutive layers for the faces, so we can
reuse the same addressing logic as for 2D array textures. Hence the
3D lookup vector becomes a 2D coordinate plus layer coordinate after
projection. The only difference is we don't have to clamp to the range
of layers.

This simplifies the sampled image descriptor since we only have to store
a single pointer per mipmap level. We also avoid the per-lane lookup
(gather) operation. YCbCr sampling was adjusted to not use the same
array of buffer pointers.

Also eliminate the unused lodOrBias parameter from computeLod*(). It's
added afterwards.

Bug: b/134164485
Bug: b/129523279
Change-Id: I5c349ff458aabb1d77e32104429b635d96237292
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/31088
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp
index f105190..957f819 100644
--- a/src/Device/Blitter.cpp
+++ b/src/Device/Blitter.cpp
@@ -1855,7 +1855,7 @@
 			// Low Border, Low Pixel, High Border, High Pixel
 			Int LB(-1), LP(0), HB(dim), HP(dim-1);
 
-			for(int i = 0; i < 6; ++i)
+			for(int face = 0; face < 6; face++)
 			{
 				computeCubeCorner(layers, LB, LP, LB, LP, pitchB, state);
 				computeCubeCorner(layers, LB, LP, HB, HP, pitchB, state);
diff --git a/src/Device/Sampler.hpp b/src/Device/Sampler.hpp
index e2ec5ef..73afd67 100644
--- a/src/Device/Sampler.hpp
+++ b/src/Device/Sampler.hpp
@@ -29,7 +29,7 @@
 {
 	struct Mipmap
 	{
-		const void *buffer[6];
+		const void *buffer;
 
 		short4 uHalf;
 		short4 vHalf;
@@ -102,7 +102,8 @@
 		ADDRESSING_MIRRORONCE,
 		ADDRESSING_BORDER,     // Single color
 		ADDRESSING_SEAMLESS,   // Border of pixels
-		ADDRESSING_LAYER,
+		ADDRESSING_CUBEFACE,   // Cube face layer
+		ADDRESSING_LAYER,      // Array layer
 		ADDRESSING_TEXELFETCH,
 
 		ADDRESSING_LAST = ADDRESSING_TEXELFETCH
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index 84a32b5..9b89b34 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -71,7 +71,6 @@
 		Float4 wwww = w;
 		Float4 qqqq = q;
 
-		Int face[4];
 		Float lod;
 		Float anisotropy;
 		Float4 uDelta;
@@ -80,7 +79,8 @@
 
 		if(state.textureType == TEXTURE_CUBE)
 		{
-			cubeFace(face, uuuu, vvvv, u, v, w, M);
+			Int4 face = cubeFace(uuuu, vvvv, u, v, w, M);
+			wwww = As<Float4>(face);
 		}
 
 		if(function == Implicit || function == Bias || function == Grad || function == Query)
@@ -89,16 +89,16 @@
 			{
 				if(state.textureType != TEXTURE_CUBE)
 				{
-					computeLod(texture, sampler, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, lodOrBias.x, dsx, dsy, function);
+					computeLod(texture, sampler, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, dsx, dsy, function);
 				}
 				else
 				{
-					computeLodCube(texture, sampler, lod, u, v, w, lodOrBias.x, dsx, dsy, M, function);
+					computeLodCube(texture, sampler, lod, u, v, w, dsx, dsy, M, function);
 				}
 			}
 			else
 			{
-				computeLod3D(texture, sampler, lod, uuuu, vvvv, wwww, lodOrBias.x, dsx, dsy, function);
+				computeLod3D(texture, sampler, lod, uuuu, vvvv, wwww, dsx, dsy, function);
 			}
 
 			if(function == Bias)
@@ -155,7 +155,7 @@
 
 		if(use32BitFiltering)
 		{
-			c = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, face, function);
+			c = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, function);
 
 			if (!hasFloatTexture() && !hasUnnormalizedIntegerTexture() && !state.compareEnable)
 			{
@@ -197,7 +197,7 @@
 		}
 		else  // 16-bit filtering.
 		{
-			Vector4s cs = sampleFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
+			Vector4s cs = sampleFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, function);
 
 			switch (state.textureFormat)
 			{
@@ -315,9 +315,9 @@
 		return uvw;
 	}
 
-	Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
+	Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
 	{
-		Vector4s c = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
+		Vector4s c = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, false, function);
 
 		if(function == Fetch)
 		{
@@ -326,7 +326,7 @@
 
 		if(state.mipmapFilter == MIPMAP_LINEAR)
 		{
-			Vector4s cc = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
+			Vector4s cc = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, true, function);
 
 			lod *= Float(1 << 16);
 
@@ -360,13 +360,13 @@
 		return c;
 	}
 
-	Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
+	Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
 	{
 		Vector4s c;
 
 		if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
 		{
-			c = sampleQuad(texture, u, v, w, offset, lod, face, secondLOD, function);
+			c = sampleQuad(texture, u, v, w, offset, lod, secondLOD, function);
 		}
 		else
 		{
@@ -397,7 +397,7 @@
 
 			Do
 			{
-				c = sampleQuad(texture, u0, v0, w, offset, lod, face, secondLOD, function);
+				c = sampleQuad(texture, u0, v0, w, offset, lod, secondLOD, function);
 
 				u0 += du;
 				v0 += dv;
@@ -420,11 +420,11 @@
 		return c;
 	}
 
-	Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
+	Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
 	{
 		if(state.textureType != TEXTURE_3D)
 		{
-			return sampleQuad2D(texture, u, v, w, offset, lod, face, secondLOD, function);
+			return sampleQuad2D(texture, u, v, w, offset, lod, secondLOD, function);
 		}
 		else
 		{
@@ -432,7 +432,7 @@
 		}
 	}
 
-	Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
+	Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
 	{
 		Vector4s c;
 
@@ -440,9 +440,8 @@
 		bool gather = (state.textureFilter == FILTER_GATHER);
 
 		Pointer<Byte> mipmap;
-		Pointer<Byte> buffer[4];
-
-		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
+		Pointer<Byte> buffer;
+		selectMipmap(texture, mipmap, buffer, lod, secondLOD);
 
 		bool texelFetch = (function == Fetch);
 
@@ -644,10 +643,8 @@
 		int componentCount = textureComponentCount();
 
 		Pointer<Byte> mipmap;
-		Pointer<Byte> buffer[4];
-		Int face[4];
-
-		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
+		Pointer<Byte> buffer;
+		selectMipmap(texture, mipmap, buffer, lod, secondLOD);
 
 		bool texelFetch = (function == Fetch);
 
@@ -762,9 +759,9 @@
 		return c_;
 	}
 
-	Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
+	Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
 	{
-		Vector4f c = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
+		Vector4f c = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, false, function);
 
 		if(function == Fetch)
 		{
@@ -773,7 +770,7 @@
 
 		if(state.mipmapFilter == MIPMAP_LINEAR)
 		{
-			Vector4f cc = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
+			Vector4f cc = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, true, function);
 
 			Float4 lod4 = Float4(Frac(lod));
 
@@ -786,13 +783,13 @@
 		return c;
 	}
 
-	Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
+	Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
 	{
 		Vector4f c;
 
 		if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
 		{
-			c = sampleFloat(texture, u, v, w, q, offset, lod, face, secondLOD, function);
+			c = sampleFloat(texture, u, v, w, q, offset, lod, secondLOD, function);
 		}
 		else
 		{
@@ -821,7 +818,7 @@
 
 			Do
 			{
-				c = sampleFloat(texture, u0, v0, w, q, offset, lod, face, secondLOD, function);
+				c = sampleFloat(texture, u0, v0, w, q, offset, lod, secondLOD, function);
 
 				u0 += du;
 				v0 += dv;
@@ -844,11 +841,11 @@
 		return c;
 	}
 
-	Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
+	Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
 	{
 		if(state.textureType != TEXTURE_3D)
 		{
-			return sampleFloat2D(texture, u, v, w, q, offset, lod, face, secondLOD, function);
+			return sampleFloat2D(texture, u, v, w, q, offset, lod, secondLOD, function);
 		}
 		else
 		{
@@ -856,7 +853,7 @@
 		}
 	}
 
-	Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
+	Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
 	{
 		Vector4f c;
 
@@ -864,9 +861,8 @@
 		bool gather = (state.textureFilter == FILTER_GATHER);
 
 		Pointer<Byte> mipmap;
-		Pointer<Byte> buffer[4];
-
-		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
+		Pointer<Byte> buffer;
+		selectMipmap(texture, mipmap, buffer, lod, secondLOD);
 
 		Int4 x0, x1, y0, y1, z0;
 		Float4 fu, fv, fw;
@@ -877,10 +873,9 @@
 
 		Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
 		y0 *= pitchP;
-		if(hasThirdCoordinate())
+		if(state.addressingModeW != ADDRESSING_UNUSED)
 		{
-			Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
-			z0 *= sliceP;
+			z0 *= *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
 		}
 
 		if(state.textureFilter == FILTER_POINT || (function == Fetch))
@@ -942,10 +937,8 @@
 		int componentCount = textureComponentCount();
 
 		Pointer<Byte> mipmap;
-		Pointer<Byte> buffer[4];
-		Int face[4];
-
-		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
+		Pointer<Byte> buffer;
+		selectMipmap(texture, mipmap, buffer, lod, secondLOD);
 
 		Int4 x0, x1, y0, y1, z0, z1;
 		Float4 fu, fv, fw;
@@ -1038,7 +1031,7 @@
 		return lod;
 	}
 
-	void SamplerCore::computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
+	void SamplerCore::computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
 	{
 		Float4 duvdxy;
 
@@ -1084,7 +1077,7 @@
 		lod = log2sqrt(lod);   // log2(sqrt(lod))
 	}
 
-	void SamplerCore::computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function)
+	void SamplerCore::computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function)
 	{
 		Float4 dudxy, dvdxy, dsdxy;
 
@@ -1125,7 +1118,7 @@
 		lod = log2(lod);
 	}
 
-	void SamplerCore::computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
+	void SamplerCore::computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
 	{
 		Float4 dudxy, dvdxy, dsdxy;
 
@@ -1159,8 +1152,11 @@
 		lod = log2sqrt(lod);   // log2(sqrt(lod))
 	}
 
-	void SamplerCore::cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M)
+	Int4 SamplerCore::cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M)
 	{
+		// TODO: Comply with Vulkan recommendation:
+		// Vulkan 1.1: "The rules should have as the first rule that rz wins over ry and rx, and the second rule that ry wins over rx."
+
 		Int4 xn = CmpLT(x, Float4(0.0f));   // x < 0
 		Int4 yn = CmpLT(y, Float4(0.0f));   // y < 0
 		Int4 zn = CmpLT(z, Float4(0.0f));   // z < 0
@@ -1189,13 +1185,15 @@
 		Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000);
 		Int negative = SignMask(n);
 
-		face[0] = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4);
-		face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4);
-		face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4);
-		face[1] = (face[0] >> 4)  & 0x7;
-		face[2] = (face[0] >> 8)  & 0x7;
-		face[3] = (face[0] >> 12) & 0x7;
-		face[0] &= 0x7;
+		Int faces = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4);
+		faces |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4);
+		faces |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4);
+
+		Int4 face;
+		face.x = faces & 0x7;
+		face.y = (faces >> 4)  & 0x7;
+		face.z = (faces >> 8)  & 0x7;
+		face.w = (faces >> 12) & 0x7;
 
 		M = Max(Max(absX, absY), Max(absZ, Float4(std::numeric_limits<float>::min())));
 
@@ -1208,6 +1206,8 @@
 		M = reciprocal(M) * Float4(0.5f);
 		U = U * M + Float4(0.5f);
 		V = V * M + Float4(0.5f);
+
+		return face;
 	}
 
 	Short4 SamplerCore::applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode)
@@ -1315,7 +1315,7 @@
 	{
 		UInt4 indices = uuuu + vvvv;
 
-		if(hasThirdCoordinate())
+		if(state.addressingModeW != ADDRESSING_UNUSED)
 		{
 			indices += As<UInt4>(wwww);
 		}
@@ -1333,21 +1333,16 @@
 		}
 	}
 
-	Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer[4])
+	Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer)
 	{
 		Vector4s c;
 
-		int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
-		int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
-		int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
-		int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
-
 		if(has16bitTextureFormat())
 		{
-			c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
-			c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
-			c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
-			c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
+			c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
+			c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
+			c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
+			c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
 
 			switch(state.textureFormat)
 			{
@@ -1378,10 +1373,10 @@
 			{
 			case 4:
 				{
-					Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]];
-					Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]];
-					Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]];
-					Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]];
+					Byte4 c0 = Pointer<Byte4>(buffer)[index[0]];
+					Byte4 c1 = Pointer<Byte4>(buffer)[index[1]];
+					Byte4 c2 = Pointer<Byte4>(buffer)[index[2]];
+					Byte4 c3 = Pointer<Byte4>(buffer)[index[3]];
 					c.x = Unpack(c0, c1);
 					c.y = Unpack(c2, c3);
 
@@ -1445,10 +1440,10 @@
 				}
 				break;
 			case 2:
-				c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
-				c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
-				c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
-				c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
 
 				switch(state.textureFormat)
 				{
@@ -1474,10 +1469,10 @@
 				break;
 			case 1:
 				{
-					Int c0 = Int(*Pointer<Byte>(buffer[f0] + index[0]));
-					Int c1 = Int(*Pointer<Byte>(buffer[f1] + index[1]));
-					Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2]));
-					Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3]));
+					Int c0 = Int(*Pointer<Byte>(buffer + index[0]));
+					Int c1 = Int(*Pointer<Byte>(buffer + index[1]));
+					Int c2 = Int(*Pointer<Byte>(buffer + index[2]));
+					Int c3 = Int(*Pointer<Byte>(buffer + index[3]));
 					c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
 
 					switch(state.textureFormat)
@@ -1514,33 +1509,33 @@
 			switch(textureComponentCount())
 			{
 			case 4:
-				c.x = Pointer<Short4>(buffer[f0])[index[0]];
-				c.y = Pointer<Short4>(buffer[f1])[index[1]];
-				c.z = Pointer<Short4>(buffer[f2])[index[2]];
-				c.w = Pointer<Short4>(buffer[f3])[index[3]];
+				c.x = Pointer<Short4>(buffer)[index[0]];
+				c.y = Pointer<Short4>(buffer)[index[1]];
+				c.z = Pointer<Short4>(buffer)[index[2]];
+				c.w = Pointer<Short4>(buffer)[index[3]];
 				transpose4x4(c.x, c.y, c.z, c.w);
 				break;
 			case 3:
-				c.x = Pointer<Short4>(buffer[f0])[index[0]];
-				c.y = Pointer<Short4>(buffer[f1])[index[1]];
-				c.z = Pointer<Short4>(buffer[f2])[index[2]];
-				c.w = Pointer<Short4>(buffer[f3])[index[3]];
+				c.x = Pointer<Short4>(buffer)[index[0]];
+				c.y = Pointer<Short4>(buffer)[index[1]];
+				c.z = Pointer<Short4>(buffer)[index[2]];
+				c.w = Pointer<Short4>(buffer)[index[3]];
 				transpose4x3(c.x, c.y, c.z, c.w);
 				break;
 			case 2:
-				c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]);
-				c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1])));
-				c.z = *Pointer<Short4>(buffer[f2] + 4 * index[2]);
-				c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer[f3] + 4 * index[3])));
+				c.x = *Pointer<Short4>(buffer + 4 * index[0]);
+				c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer + 4 * index[1])));
+				c.z = *Pointer<Short4>(buffer + 4 * index[2]);
+				c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer + 4 * index[3])));
 				c.y = c.x;
 				c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z));
 				c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z));
 				break;
 			case 1:
-				c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
-				c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
-				c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
-				c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
+				c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
 				break;
 			default:
 				ASSERT(false);
@@ -1549,10 +1544,10 @@
 		else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UNORM_PACK32)
 		{
 			Int4 cc;
-			cc = Insert(cc, Pointer<Int>(buffer[f0])[index[0]], 0);
-			cc = Insert(cc, Pointer<Int>(buffer[f1])[index[1]], 1);
-			cc = Insert(cc, Pointer<Int>(buffer[f2])[index[2]], 2);
-			cc = Insert(cc, Pointer<Int>(buffer[f3])[index[3]], 3);
+			cc = Insert(cc, Pointer<Int>(buffer)[index[0]], 0);
+			cc = Insert(cc, Pointer<Int>(buffer)[index[1]], 1);
+			cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
+			cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
 
 			// shift each 10 bit field left 6, and replicate 6 high bits into bottom 6
 			c.x = Short4(((cc << 6) & Int4(0xFFC0)) | ((cc >> 4) & Int4(0x3F)));
@@ -1568,10 +1563,10 @@
 		else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UINT_PACK32)
 		{
 			Int4 cc;
-			cc = Insert(cc, Pointer<Int>(buffer[f0])[index[0]], 0);
-			cc = Insert(cc, Pointer<Int>(buffer[f1])[index[1]], 1);
-			cc = Insert(cc, Pointer<Int>(buffer[f2])[index[2]], 2);
-			cc = Insert(cc, Pointer<Int>(buffer[f3])[index[3]], 3);
+			cc = Insert(cc, Pointer<Int>(buffer)[index[0]], 0);
+			cc = Insert(cc, Pointer<Int>(buffer)[index[1]], 1);
+			cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
+			cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
 
 			c.x = Short4(((cc) & Int4(0x3FF)));
 			c.y = Short4(((cc >> 10) & Int4(0x3FF)));
@@ -1594,7 +1589,7 @@
 		return c;
 	}
 
-	Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
+	Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer, SamplerFunction function)
 	{
 		Vector4s c;
 
@@ -1603,11 +1598,16 @@
 
 		if(isYcbcrFormat())
 		{
+			// Pointers to the planes of YCbCr images are stored in consecutive mipmap levels.
+			Pointer<Byte> bufferY = buffer;  // *Pointer<Pointer<Byte>>(mipmap + 0 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
+			Pointer<Byte> bufferU = *Pointer<Pointer<Byte>>(mipmap + 1 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));  // U/V for 2-plane interleaved formats.
+			Pointer<Byte> bufferV = *Pointer<Pointer<Byte>>(mipmap + 2 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
+
 			// Luminance
-			Int c0 = Int(buffer[0][index[0]]);
-			Int c1 = Int(buffer[0][index[1]]);
-			Int c2 = Int(buffer[0][index[2]]);
-			Int c3 = Int(buffer[0][index[3]]);
+			Int c0 = Int(bufferY[index[0]]);
+			Int c1 = Int(bufferY[index[1]]);
+			Int c2 = Int(bufferY[index[2]]);
+			Int c3 = Int(bufferY[index[3]]);
 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
 			UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0)));
 
@@ -1620,27 +1620,27 @@
 
 				if(state.textureFormat == VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM)
 				{
-					c0 = Int(buffer[1][index[0]]);
-					c1 = Int(buffer[1][index[1]]);
-					c2 = Int(buffer[1][index[2]]);
-					c3 = Int(buffer[1][index[3]]);
+					c0 = Int(bufferU[index[0]]);
+					c1 = Int(bufferU[index[1]]);
+					c2 = Int(bufferU[index[2]]);
+					c3 = Int(bufferU[index[3]]);
 					c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
 					U = As<UShort4>(Unpack(As<Byte4>(c0)));
 
-					c0 = Int(buffer[2][index[0]]);
-					c1 = Int(buffer[2][index[1]]);
-					c2 = Int(buffer[2][index[2]]);
-					c3 = Int(buffer[2][index[3]]);
+					c0 = Int(bufferV[index[0]]);
+					c1 = Int(bufferV[index[1]]);
+					c2 = Int(bufferV[index[2]]);
+					c3 = Int(bufferV[index[3]]);
 					c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
 					V = As<UShort4>(Unpack(As<Byte4>(c0)));
 				}
 				else if(state.textureFormat == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM)
 				{
 					Short4 UV;
-					UV = Insert(UV, Pointer<Short>(buffer[1])[index[0]], 0);  // TODO: Insert(UShort4, UShort)
-					UV = Insert(UV, Pointer<Short>(buffer[1])[index[1]], 1);
-					UV = Insert(UV, Pointer<Short>(buffer[1])[index[2]], 2);
-					UV = Insert(UV, Pointer<Short>(buffer[1])[index[3]], 3);
+					UV = Insert(UV, Pointer<Short>(bufferU)[index[0]], 0);  // TODO: Insert(UShort4, UShort)
+					UV = Insert(UV, Pointer<Short>(bufferU)[index[1]], 1);
+					UV = Insert(UV, Pointer<Short>(bufferU)[index[2]], 2);
+					UV = Insert(UV, Pointer<Short>(bufferU)[index[3]], 3);
 					U = (UV & Short4(0x00FFu)) | (UV << 8);
 					V = (UV & Short4(0xFF00u)) | As<Short4>(As<UShort4>(UV) >> 8);
 				}
@@ -1732,7 +1732,7 @@
 		return c;
 	}
 
-	Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
+	Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer, SamplerFunction function)
 	{
 		Int4 valid;
 
@@ -1754,18 +1754,13 @@
 
 		if(hasFloatTexture() || has32bitIntegerTextureComponents())
 		{
-			int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
-			int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
-			int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
-			int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
-
 			switch (state.textureFormat)
 			{
 			case VK_FORMAT_R16_SFLOAT:
-				t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 2));
-				t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 2));
-				t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 2));
-				t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 2));
+				t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 2));
+				t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 2));
+				t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 2));
+				t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 2));
 
 				c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
 				c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
@@ -1773,10 +1768,10 @@
 				c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
 				break;
 			case VK_FORMAT_R16G16_SFLOAT:
-				t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 4));
-				t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 4));
-				t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 4));
-				t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 4));
+				t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 4));
+				t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 4));
+				t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 4));
+				t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 4));
 
 				// FIXME: shuffles
 				c.x = As<Float4>(halfToFloatBits(t0));
@@ -1786,10 +1781,10 @@
 				transpose4x4(c.x, c.y, c.z, c.w);
 				break;
 			case VK_FORMAT_R16G16B16A16_SFLOAT:
-				t0 = Int4(*Pointer<UShort4>(buffer[f0] + index[0] * 8));
-				t1 = Int4(*Pointer<UShort4>(buffer[f1] + index[1] * 8));
-				t2 = Int4(*Pointer<UShort4>(buffer[f2] + index[2] * 8));
-				t3 = Int4(*Pointer<UShort4>(buffer[f3] + index[3] * 8));
+				t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 8));
+				t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 8));
+				t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 8));
+				t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 8));
 
 				c.x = As<Float4>(halfToFloatBits(t0));
 				c.y = As<Float4>(halfToFloatBits(t1));
@@ -1802,19 +1797,19 @@
 			case VK_FORMAT_R32_UINT:
 			case VK_FORMAT_D32_SFLOAT:
 				// FIXME: Optimal shuffling?
-				c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
-				c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
-				c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
-				c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
+				c.x.x = *Pointer<Float>(buffer + index[0] * 4);
+				c.x.y = *Pointer<Float>(buffer + index[1] * 4);
+				c.x.z = *Pointer<Float>(buffer + index[2] * 4);
+				c.x.w = *Pointer<Float>(buffer + index[3] * 4);
 				break;
 			case VK_FORMAT_R32G32_SFLOAT:
 			case VK_FORMAT_R32G32_SINT:
 			case VK_FORMAT_R32G32_UINT:
 				// FIXME: Optimal shuffling?
-				c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
-				c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
-				c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
-				c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
+				c.x.xy = *Pointer<Float4>(buffer + index[0] * 8);
+				c.x.zw = *Pointer<Float4>(buffer + index[1] * 8 - 8);
+				c.z.xy = *Pointer<Float4>(buffer + index[2] * 8);
+				c.z.zw = *Pointer<Float4>(buffer + index[3] * 8 - 8);
 				c.y = c.x;
 				c.x = Float4(c.x.xz, c.z.xz);
 				c.y = Float4(c.y.yw, c.z.yw);
@@ -1822,28 +1817,28 @@
 			case VK_FORMAT_R32G32B32_SFLOAT:
 			case VK_FORMAT_R32G32B32_SINT:
 			case VK_FORMAT_R32G32B32_UINT:
-				c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
-				c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
-				c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
-				c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
+				c.x = *Pointer<Float4>(buffer + index[0] * 16, 16);
+				c.y = *Pointer<Float4>(buffer + index[1] * 16, 16);
+				c.z = *Pointer<Float4>(buffer + index[2] * 16, 16);
+				c.w = *Pointer<Float4>(buffer + index[3] * 16, 16);
 				transpose4x3(c.x, c.y, c.z, c.w);
 				break;
 			case VK_FORMAT_R32G32B32A32_SFLOAT:
 			case VK_FORMAT_R32G32B32A32_SINT:
 			case VK_FORMAT_R32G32B32A32_UINT:
-				c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
-				c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
-				c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
-				c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
+				c.x = *Pointer<Float4>(buffer + index[0] * 16, 16);
+				c.y = *Pointer<Float4>(buffer + index[1] * 16, 16);
+				c.z = *Pointer<Float4>(buffer + index[2] * 16, 16);
+				c.w = *Pointer<Float4>(buffer + index[3] * 16, 16);
 				transpose4x4(c.x, c.y, c.z, c.w);
 				break;
 			case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
 			{
 				Float4 t;		// TODO: add Insert(UInt4, RValue<UInt>)
-				t.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
-				t.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
-				t.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
-				t.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
+				t.x = *Pointer<Float>(buffer + index[0] * 4);
+				t.y = *Pointer<Float>(buffer + index[1] * 4);
+				t.z = *Pointer<Float>(buffer + index[2] * 4);
+				t.w = *Pointer<Float>(buffer + index[3] * 4);
 				t0 = As<UInt4>(t);
 				c.w = Float4(UInt4(1) << ((t0 >> 27) & UInt4(0x1F))) * Float4(1.0f / (1 << 24));
 				c.x = Float4((t0) & UInt4(0x1FF)) * c.w;
@@ -1854,10 +1849,10 @@
 			case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
 			{
 				Float4 t;		// TODO: add Insert(UInt4, RValue<UInt>)
-				t.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
-				t.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
-				t.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
-				t.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
+				t.x = *Pointer<Float>(buffer + index[0] * 4);
+				t.y = *Pointer<Float>(buffer + index[1] * 4);
+				t.z = *Pointer<Float>(buffer + index[2] * 4);
+				t.w = *Pointer<Float>(buffer + index[3] * 4);
 				t0 = As<UInt4>(t);
 				c.x = As<Float4>(halfToFloatBits((t0 << 4) & UInt4(0x7FF0)));
 				c.y = As<Float4>(halfToFloatBits((t0 >> 7) & UInt4(0x7FF0)));
@@ -1988,11 +1983,13 @@
 		return out;
 	}
 
-	void SamplerCore::selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD)
+	void SamplerCore::selectMipmap(const Pointer<Byte> &texture, Pointer<Byte> &mipmap, Pointer<Byte> &buffer, const Float &lod, bool secondLOD)
 	{
+		Pointer<Byte> mipmap0 = texture + OFFSET(Texture, mipmap[0]);
+
 		if(state.mipmapFilter == MIPMAP_NONE)
 		{
-			mipmap = texture + OFFSET(Texture,mipmap[0]);
+			mipmap = mipmap0;
 		}
 		else
 		{
@@ -2008,26 +2005,10 @@
 				ilod = Int(lod);
 			}
 
-			mipmap = texture + OFFSET(Texture,mipmap) + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap);
+			mipmap = mipmap0 + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap);
 		}
 
-		if(state.textureType != TEXTURE_CUBE)
-		{
-			buffer[0] = *Pointer<Pointer<Byte>>(mipmap + OFFSET(Mipmap,buffer[0]));
-
-			if(isYcbcrFormat())
-			{
-				buffer[1] = *Pointer<Pointer<Byte>>(mipmap + sizeof(Mipmap) * 1 + OFFSET(Mipmap,buffer[0]));
-				buffer[2] = *Pointer<Pointer<Byte>>(mipmap + sizeof(Mipmap) * 2 + OFFSET(Mipmap,buffer[0]));
-			}
-		}
-		else
-		{
-			for(int i = 0; i < 4; i++)
-			{
-				buffer[i] = *Pointer<Pointer<Byte>>(mipmap + OFFSET(Mipmap,buffer) + face[i] * sizeof(void*));
-			}
-		}
+		buffer = *Pointer<Pointer<Byte>>(mipmap + OFFSET(Mipmap, buffer));
 	}
 
 	Int4 SamplerCore::computeFilterOffset(Float &lod)
@@ -2122,6 +2103,10 @@
 		{
 			xyz0 = Min(Max(RoundInt(uvw), Int4(0)), maxXYZ);
 		}
+		else if(addressingMode == ADDRESSING_CUBEFACE)
+		{
+			xyz0 = As<Int4>(uvw);
+		}
 		else
 		{
 			const int halfBits = 0x3EFFFFFF;   // Value just under 0.5f
diff --git a/src/Pipeline/SamplerCore.hpp b/src/Pipeline/SamplerCore.hpp
index 0b75bf1..17aaab5 100644
--- a/src/Pipeline/SamplerCore.hpp
+++ b/src/Pipeline/SamplerCore.hpp
@@ -66,30 +66,30 @@
 
 	private:
 		Short4 offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod);
-		Vector4s sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function);
-		Vector4s sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function);
-		Vector4s sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
-		Vector4s sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
+		Vector4s sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function);
+		Vector4s sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function);
+		Vector4s sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
+		Vector4s sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
 		Vector4s sample3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
-		Vector4f sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function);
-		Vector4f sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function);
-		Vector4f sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
-		Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function);
+		Vector4f sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function);
+		Vector4f sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function);
+		Vector4f sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
+		Vector4f sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
 		Vector4f sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function);
 		Float log2sqrt(Float lod);
 		Float log2(Float lod);
-		void computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
-		void computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function);
-		void computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodOrBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
-		void cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M);
+		void computeLod(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &u, Float4 &v, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
+		void computeLodCube(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function);
+		void computeLod3D(Pointer<Byte> &texture, Pointer<Byte> &sampler, Float &lod, Float4 &u, Float4 &v, Float4 &w, Vector4f &dsx, Vector4f &dsy, SamplerFunction function);
+		Int4 cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M);
 		Short4 applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode);
 		void computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function);
 		void computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, Int4 valid, const Pointer<Byte> &mipmap, SamplerFunction function);
-		Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
-		Vector4s sampleTexel(UInt index[4], Pointer<Byte> buffer[4]);
-		Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &s, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function);
+		Vector4s sampleTexel(Short4 &u, Short4 &v, Short4 &s, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer, SamplerFunction function);
+		Vector4s sampleTexel(UInt index[4], Pointer<Byte> buffer);
+		Vector4f sampleTexel(Int4 &u, Int4 &v, Int4 &s, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer, SamplerFunction function);
 		Vector4f replaceBorderTexel(const Vector4f &c, Int4 valid);
-		void selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD);
+		void selectMipmap(const Pointer<Byte> &texture, Pointer<Byte> &mipmap, Pointer<Byte> &buffer, const Float &lod, bool secondLOD);
 		Short4 address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap);
 		void address(Float4 &uw, Int4& xyz0, Int4& xyz1, Float4& f, Pointer<Byte>& mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function);
 		Int4 computeFilterOffset(Float &lod);
diff --git a/src/Pipeline/SpirvShaderSampling.cpp b/src/Pipeline/SpirvShaderSampling.cpp
index 0c76457..72f3d77 100644
--- a/src/Pipeline/SpirvShaderSampling.cpp
+++ b/src/Pipeline/SpirvShaderSampling.cpp
@@ -272,12 +272,7 @@
 		}
 		// Fall through to CUBE case:
 	case VK_IMAGE_VIEW_TYPE_CUBE:
-		if(coordinateIndex >= 2)
-		{
-			// Cube faces are addressed as 2D images.
-			return ADDRESSING_UNUSED;
-		}
-		else
+		if(coordinateIndex <= 1)  // Cube faces themselves are addressed as 2D images.
 		{
 			// Vulkan 1.1 spec:
 			// "Cube images ignore the wrap modes specified in the sampler. Instead, if VK_FILTER_NEAREST is used within a mip level then
@@ -286,6 +281,15 @@
 			// This corresponds with our 'SEAMLESS' addressing mode.
 			return ADDRESSING_SEAMLESS;
 		}
+		else if(coordinateIndex == 2)
+		{
+			// The cube face is an index into array layers.
+			return ADDRESSING_CUBEFACE;
+		}
+		else
+		{
+			return ADDRESSING_UNUSED;
+		}
 		break;
 
 	case VK_IMAGE_VIEW_TYPE_1D:  // Treated as 2D texture with second coordinate 0.
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 2494b7a..95cb094 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -1475,7 +1475,7 @@
 		RValue<Vector4> operator=(RValue<typename Scalar<Vector4>::Type> rhs);
 
 	private:
-		Float4 *parent;
+		Vector4 *parent;
 	};
 
 	template<class Vector4, int T>
diff --git a/src/Vulkan/VkDescriptorSetLayout.cpp b/src/Vulkan/VkDescriptorSetLayout.cpp
index 41c9d4b..624fef9 100644
--- a/src/Vulkan/VkDescriptorSetLayout.cpp
+++ b/src/Vulkan/VkDescriptorSetLayout.cpp
@@ -322,7 +322,7 @@
 			imageSampler[i].texture.depth = sw::replicate(1);
 
 			sw::Mipmap &mipmap = imageSampler[i].texture.mipmap[0];
-			mipmap.buffer[0] = bufferView->getPointer();
+			mipmap.buffer = bufferView->getPointer();
 			mipmap.width[0] = mipmap.width[1] = mipmap.width[2] = mipmap.width[3] = numElements;
 			mipmap.height[0] = mipmap.height[1] = mipmap.height[2] = mipmap.height[3] = 1;
 			mipmap.depth[0] = mipmap.depth[1] = mipmap.depth[2] = mipmap.depth[3] = 1;
@@ -373,11 +373,11 @@
 
 				const int level = 0;
 				VkOffset3D offset = {0, 0, 0};
-				texture->mipmap[0].buffer[0] = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_0_BIT, level, 0, ImageView::SAMPLING);
-				texture->mipmap[1].buffer[0] = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_1_BIT, level, 0, ImageView::SAMPLING);
+				texture->mipmap[0].buffer = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_0_BIT, level, 0, ImageView::SAMPLING);
+				texture->mipmap[1].buffer = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_1_BIT, level, 0, ImageView::SAMPLING);
 				if(format.getAspects() & VK_IMAGE_ASPECT_PLANE_2_BIT)
 				{
-					texture->mipmap[2].buffer[0] = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_2_BIT, level, 0, ImageView::SAMPLING);
+					texture->mipmap[2].buffer = imageView->getOffsetPointer(offset, VK_IMAGE_ASPECT_PLANE_2_BIT, level, 0, ImageView::SAMPLING);
 				}
 
 				VkExtent3D extent = imageView->getMipLevelExtent(0);
@@ -407,20 +407,15 @@
 
 					if(imageView->getType() == VK_IMAGE_VIEW_TYPE_CUBE)
 					{
-						for(int face = 0; face < 6; face++)
-						{
-							// Obtain the pointer to the corner of the level including the border, for seamless sampling.
-							// This is taken into account in the sampling routine, which can't handle negative texel coordinates.
-							VkOffset3D offset = {-1, -1, 0};
-
-							// TODO(b/129523279): Implement as 6 consecutive layers instead of separate pointers.
-							mipmap.buffer[face] = imageView->getOffsetPointer(offset, aspect, level, face, ImageView::SAMPLING);
-						}
+						// Obtain the pointer to the corner of the level including the border, for seamless sampling.
+						// This is taken into account in the sampling routine, which can't handle negative texel coordinates.
+						VkOffset3D offset = {-1, -1, 0};
+						mipmap.buffer = imageView->getOffsetPointer(offset, aspect, level, 0, ImageView::SAMPLING);
 					}
 					else
 					{
 						VkOffset3D offset = {0, 0, 0};
-						mipmap.buffer[0] = imageView->getOffsetPointer(offset, aspect, level, 0, ImageView::SAMPLING);
+						mipmap.buffer = imageView->getOffsetPointer(offset, aspect, level, 0, ImageView::SAMPLING);
 					}
 
 					VkExtent3D extent = imageView->getMipLevelExtent(level);
@@ -438,7 +433,7 @@
 		}
 	}
 	else if (entry.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
-			 entry.descriptorType == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)
+	         entry.descriptorType == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)
 	{
 		auto descriptor = reinterpret_cast<StorageImageDescriptor *>(memToWrite);
 		for(uint32_t i = 0; i < entry.descriptorCount; i++)
@@ -485,9 +480,9 @@
 		}
 	}
 	else if (entry.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
-			 entry.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
-			 entry.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
-			 entry.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+	         entry.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+	         entry.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
+	         entry.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
 	{
 		auto descriptor = reinterpret_cast<BufferDescriptor *>(memToWrite);
 		for (uint32_t i = 0; i < entry.descriptorCount; i++)