SpirvShader: Refactor loads and stores. Consider limits. This change moves all calls to rr::Load() and rr::Store() to two new functions: SIMD::Load() and SIMD::Store(). This attempts to consolodate the SIMD memory ops into reusable functions, while also adding bounds checking on the accesses. The additional branches hurts the JIT codegen performance. This will be resolved with a future change. Tests: dEQP-VK.robustness.* Bug: b/131224163 Change-Id: I3a392a1f4f5366fa5134c081e0a2479575f92d80 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/29334 Reviewed-by: Chris Forbes <chrisforbes@google.com> Tested-by: Ben Clayton <bclayton@google.com> Presubmit-Ready: Ben Clayton <bclayton@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>

commit: 9e4bc1babfeef7abe7fa288a36eedca18fd3db20 [log] [tgz]
author: Ben Clayton <bclayton@google.com> Tue Apr 16 16:52:02 2019 -0400
committer: Ben Clayton <bclayton@google.com> Wed Apr 24 16:22:22 2019 +0000
tree: d7c1c1274375bbd59206e305e00a3f84e3aa1c23
parent: 4d1f8d05a85a4e8a77034edaa074f7e89ac65bf5 [diff] [blame]
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index be8de93..bd51839 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp

@@ -217,10 +217,90 @@
 			s * ( a * fkgj - b * ekgi + c * ejfi),
 		}};
 	}
-}
+
+	sw::SIMD::Pointer interleaveByLane(sw::SIMD::Pointer p)
+	{
+		p *= sw::SIMD::Width;
+		p.staticOffsets[0] += 0 * sizeof(float);
+		p.staticOffsets[1] += 1 * sizeof(float);
+		p.staticOffsets[2] += 2 * sizeof(float);
+		p.staticOffsets[3] += 3 * sizeof(float);
+		return p;
+	}
+
+} // anonymous namespace
 
 namespace sw
 {
+	namespace SIMD
+	{
+
+		template<typename T>
+		T Load(Pointer ptr, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
+		{
+			using EL = typename Element<T>::type;
+			T out;
+			auto offsets = ptr.offsets();
+			mask &= CmpLT(offsets + SIMD::Int(sizeof(float) - 1), SIMD::Int(ptr.limit)); // Disable OOB reads.
+			auto anyLanesDisabled = AnyFalse(mask);
+			If(ptr.hasEqualOffsets() && !anyLanesDisabled)
+			{
+				// Load one, replicate.
+				auto offset = Extract(offsets, 0);
+				out = T(Load(rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order));
+			}
+			Else If(ptr.hasSequentialOffsets() && !anyLanesDisabled)
+			{
+				// Load all elements in a single SIMD instruction.
+				auto offset = Extract(offsets, 0);
+				out = Load(rr::Pointer<T>(&ptr.base[offset]), sizeof(float), atomic, order);
+			}
+			Else
+			{
+				// Divergent offsets or masked lanes - load each element individually.
+				out = T(0);
+				for (int i = 0; i < SIMD::Width; i++)
+				{
+					If(Extract(mask, i) != 0)
+					{
+						auto offset = Extract(offsets, i);
+						auto el = rr::Load(rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order);
+						out = Insert(out, el, i);
+					}
+				}
+			}
+			return out;
+		}
+
+		template<typename T>
+		void Store(Pointer ptr, T val, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
+		{
+			using EL = typename Element<T>::type;
+			auto offsets = ptr.offsets();
+			mask &= CmpLT(offsets + SIMD::Int(sizeof(float) - 1), SIMD::Int(ptr.limit)); // Disable OOB reads.
+			auto anyLanesDisabled = AnyFalse(mask);
+			If(ptr.hasSequentialOffsets() && !anyLanesDisabled)
+			{
+				// Store all elements in a single SIMD instruction.
+				auto offset = Extract(offsets, 0);
+				Store(val, rr::Pointer<T>(&ptr.base[offset]), sizeof(float), atomic, order);
+			}
+			Else
+			{
+				// Divergent offsets or masked lanes.
+				for (int i = 0; i < SIMD::Width; i++)
+				{
+					If(Extract(mask, i) != 0)
+					{
+						auto offset = Extract(offsets, i);
+						rr::Store(Extract(val, i), rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order);
+					}
+				}
+			}
+		}
+
+	} // namespace SIMD
+
 	volatile int SpirvShader::serialCounter = 1;    // Start at 1, 0 is invalid shader.
 
 	SpirvShader::SpirvShader(InsnStore const &insns)
@@ -1180,7 +1260,6 @@
 				ASSERT(d.Binding >= 0);
 
 				auto set = routine->getPointer(id);
-				ASSERT(set.uniform);
 
 				auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
 				int bindingOffset = static_cast<int>(setLayout->getBindingOffset(d.Binding, arrayIndex));
@@ -1189,6 +1268,7 @@
 				Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(bufferInfo + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
 				Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
 				Int offset = *Pointer<Int>(bufferInfo + OFFSET(VkDescriptorBufferInfo, offset));
+				Int size = *Pointer<Int>(buffer + vk::Buffer::DataSize); // void*
 				if (setLayout->isBindingDynamic(d.Binding))
 				{
 					uint32_t dynamicBindingIndex =
@@ -1197,12 +1277,12 @@
 						arrayIndex;
 					offset += routine->descriptorDynamicOffsets[dynamicBindingIndex];
 				}
-				return SIMD::Pointer(data + offset);
+				return SIMD::Pointer(data + offset, size - offset);
 			}
 
 			default:
 				UNREACHABLE("Invalid pointer kind %d", int(object.kind));
-				return SIMD::Pointer(Pointer<Byte>());
+				return SIMD::Pointer(Pointer<Byte>(), 0);
 		}
 	}
 
@@ -1298,7 +1378,7 @@
 				}
 				else
 				{
-					ptr.addOffset(SIMD::Int(d.ArrayStride) * routine->getIntermediate(indexIds[i]).Int(0));
+					ptr += SIMD::Int(d.ArrayStride) * routine->getIntermediate(indexIds[i]).Int(0);
 				}
 				typeId = type.element;
 				break;
@@ -1316,7 +1396,7 @@
 				}
 				else
 				{
-					ptr.addOffset(SIMD::Int(columnStride) * routine->getIntermediate(indexIds[i]).Int(0));
+					ptr += SIMD::Int(columnStride) * routine->getIntermediate(indexIds[i]).Int(0);
 				}
 				typeId = type.element;
 				break;
@@ -1331,7 +1411,7 @@
 				}
 				else
 				{
-					ptr.addOffset(SIMD::Int(elemStride) * routine->getIntermediate(indexIds[i]).Int(0));
+					ptr += SIMD::Int(elemStride) * routine->getIntermediate(indexIds[i]).Int(0);
 				}
 				typeId = type.element;
 				break;
@@ -1341,10 +1421,7 @@
 			}
 		}
 
-		if (constantOffset != 0)
-		{
-			ptr.addOffset(constantOffset);
-		}
+		ptr += constantOffset;
 		return ptr;
 	}
 
@@ -1405,11 +1482,11 @@
 					auto & obj = getObject(indexIds[i]);
 					if (obj.kind == Object::Kind::Constant)
 					{
-						constantOffset += stride * GetConstantInt(indexIds[i]);
+						ptr += stride * GetConstantInt(indexIds[i]);
 					}
 					else
 					{
-						ptr.addOffset(SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0));
+						ptr += SIMD::Int(stride) * routine->getIntermediate(indexIds[i]).Int(0);
 					}
 				}
 				typeId = type.element;
@@ -1423,7 +1500,7 @@
 
 		if (constantOffset != 0)
 		{
-			ptr.addOffset(constantOffset);
+			ptr += constantOffset;
 		}
 		return ptr;
 	}
@@ -2261,8 +2338,11 @@
 		case spv::StorageClassPrivate:
 		case spv::StorageClassFunction:
 		{
+			ASSERT(objectTy.opcode() == spv::OpTypePointer);
 			auto base = &routine->getVariable(resultId)[0];
-			routine->createPointer(resultId, SIMD::Pointer(base));
+			auto elementTy = getType(objectTy.element);
+			auto size = elementTy.sizeInComponents * sizeof(float) * SIMD::Width;
+			routine->createPointer(resultId, SIMD::Pointer(base, size));
 			break;
 		}
 		case spv::StorageClassInput:
@@ -2277,8 +2357,11 @@
 									dst[offset++] = routine->inputs[scalarSlot];
 								});
 			}
+			ASSERT(objectTy.opcode() == spv::OpTypePointer);
 			auto base = &routine->getVariable(resultId)[0];
-			routine->createPointer(resultId, SIMD::Pointer(base));
+			auto elementTy = getType(objectTy.element);
+			auto size = elementTy.sizeInComponents * sizeof(float) * SIMD::Width;
+			routine->createPointer(resultId, SIMD::Pointer(base, size));
 			break;
 		}
 		case spv::StorageClassUniformConstant:
@@ -2292,7 +2375,8 @@
 			size_t bindingOffset = setLayout->getBindingOffset(d.Binding, arrayIndex);
 			Pointer<Byte> set = routine->descriptorSets[d.DescriptorSet];  // DescriptorSet*
 			Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset);    // vk::SampledImageDescriptor*
-			routine->createPointer(resultId, SIMD::Pointer(binding));
+			auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
+			routine->createPointer(resultId, SIMD::Pointer(binding, size));
 			break;
 		}
 		case spv::StorageClassUniform:
@@ -2300,13 +2384,13 @@
 		{
 			const auto &d = descriptorDecorations.at(resultId);
 			ASSERT(d.DescriptorSet >= 0 && d.DescriptorSet < vk::MAX_BOUND_DESCRIPTOR_SETS);
-
-			routine->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet]));
+			auto size = 0; // Not required as this pointer is not directly used by SIMD::Read or SIMD::Write.
+			routine->createPointer(resultId, SIMD::Pointer(routine->descriptorSets[d.DescriptorSet], size));
 			break;
 		}
 		case spv::StorageClassPushConstant:
 		{
-			routine->createPointer(resultId, SIMD::Pointer(routine->pushConstants));
+			routine->createPointer(resultId, SIMD::Pointer(routine->pushConstants, vk::MAX_PUSH_CONSTANT_SIZE));
 			break;
 		}
 		default:
@@ -2358,55 +2442,15 @@
 		auto ptr = GetPointerToData(pointerId, 0, routine);
 
 		bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
-		auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
-
-		auto load = std::unique_ptr<SIMD::Float[]>(new SIMD::Float[resultTy.sizeInComponents]);
-
-		If(!ptr.uniform || anyInactiveLanes)
-		{
-			// Divergent offsets or masked lanes.
-			VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
-			{
-				// i wish i had a Float,Float,Float,Float constructor here..
-				for (int j = 0; j < SIMD::Width; j++)
-				{
-					If(Extract(state->activeLaneMask(), j) != 0)
-					{
-						Int offset = Int(o) + Extract(ptr.offset, j);
-						if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
-						load[i] = Insert(load[i], Load(Pointer<Float>(&ptr.base[offset]), sizeof(float), atomic, memoryOrder), j);
-					}
-				}
-			});
-		}
-		Else
-		{
-			// No divergent offsets or masked lanes.
-			if (interleavedByLane)
-			{
-				// Lane-interleaved data.
-				VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
-				{
-					Pointer<SIMD::Float> src = &ptr.base[offset * SIMD::Width];
-					load[i] = Load(src, sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
-				});
-			}
-			else
-			{
-				// Non-interleaved data.
-				VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
-				{
-					Pointer<Float> src = &ptr.base[offset];
-					load[i] = RValue<SIMD::Float>(Load(src, sizeof(float), atomic, memoryOrder));  // TODO: optimize alignment
-				});
-			}
-		}
 
 		auto &dst = routine->createIntermediate(resultId, resultTy.sizeInComponents);
-		for (auto i = 0u; i < resultTy.sizeInComponents; i++)
+
+		VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
 		{
-			dst.move(i, load[i]);
-		}
+			auto p = ptr + offset;
+			if (interleavedByLane) { p = interleaveByLane(p); }
+			dst.move(i, SIMD::Load<SIMD::Float>(p, state->activeLaneMask(), atomic, memoryOrder));
+		});
 
 		return EmitResult::Continue;
 	}
@@ -2438,83 +2482,29 @@
 		}
 
 		auto ptr = GetPointerToData(pointerId, 0, routine);
-
 		bool interleavedByLane = IsStorageInterleavedByLane(pointerTy.storageClass);
-		auto anyInactiveLanes = AnyFalse(state->activeLaneMask());
 
 		if (object.kind == Object::Kind::Constant)
 		{
 			// Constant source data.
 			auto src = reinterpret_cast<float *>(object.constantValue.get());
-			If(!ptr.uniform || anyInactiveLanes)
+			VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
 			{
-				// Divergent offsets or masked lanes.
-				VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
-				{
-					for (int j = 0; j < SIMD::Width; j++)
-					{
-						If(Extract(state->activeLaneMask(), j) != 0)
-						{
-							Int offset = Int(o) + Extract(ptr.offset, j);
-							if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
-							Store(Float(src[i]), Pointer<Float>(&ptr.base[offset]), sizeof(float), atomic, memoryOrder);
-						}
-					}
-				});
-			}
-			Else
-			{
-				// Constant source data.
-				// No divergent offsets or masked lanes.
-				VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
-				{
-					Pointer<SIMD::Float> dst = &ptr.base[offset * SIMD::Width];
-					Store(SIMD::Float(src[i]), dst, sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
-				});
-			}
+				auto p = ptr + offset;
+				if (interleavedByLane) { p = interleaveByLane(p); }
+				SIMD::Store(p, SIMD::Float(src[i]), state->activeLaneMask(), atomic, memoryOrder);
+			});
 		}
 		else
 		{
 			// Intermediate source data.
 			auto &src = routine->getIntermediate(objectId);
-			If(!ptr.uniform || anyInactiveLanes)
+			VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
 			{
-				// Divergent offsets or masked lanes.
-				VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
-				{
-					for (int j = 0; j < SIMD::Width; j++)
-					{
-						If(Extract(state->activeLaneMask(), j) != 0)
-						{
-							Int offset = Int(o) + Extract(ptr.offset, j);
-							if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
-							Store(Extract(src.Float(i), j), Pointer<Float>(&ptr.base[offset]), sizeof(float), atomic, memoryOrder);
-						}
-					}
-				});
-			}
-			Else
-			{
-				// No divergent offsets or masked lanes.
-				if (interleavedByLane)
-				{
-					// Lane-interleaved data.
-					VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
-					{
-						Pointer<SIMD::Float> dst = &ptr.base[offset * SIMD::Width];
-						Store(src.Float(i), dst, sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
-					});
-				}
-				else
-				{
-					// Intermediate source data. Non-interleaved data.
-					VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
-					{
-						Pointer<SIMD::Float> dst = &ptr.base[offset];
-						Store(SIMD::Float(src.Float(i)), dst, sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
-					});
-				}
-			}
+				auto p = ptr + offset;
+				if (interleavedByLane) { p = interleaveByLane(p); }
+				SIMD::Store(p, src.Float(i), state->activeLaneMask(), atomic, memoryOrder);
+			});
 		}
 
 		return EmitResult::Continue;
@@ -3561,16 +3551,9 @@
 
 				dst.move(i, frac);
 
-				// TODO: Refactor and consolidate with EmitStore.
-				for (int j = 0; j < SIMD::Width; j++)
-				{
-					If(Extract(state->activeLaneMask(), j) != 0)
-					{
-						Int offset = Int(i * sizeof(float)) + Extract(ptr.offset, j);
-						if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
-						Store(Extract(whole, j), Pointer<Float>(&ptr.base[offset]), sizeof(float), false, std::memory_order_relaxed);
-					}
-				}
+				auto p = ptr + (i * sizeof(float));
+				if (interleavedByLane) { p = interleaveByLane(p); }
+				SIMD::Store(p, whole, state->activeLaneMask());
 			}
 			break;
 		}
@@ -3705,16 +3688,9 @@
 
 				dst.move(i, significand);
 
-				// TODO: Refactor and consolidate with EmitStore.
-				for (int j = 0; j < SIMD::Width; j++)
-				{
-					If(Extract(state->activeLaneMask(), j) != 0)
-					{
-						Int offset = Int(i * sizeof(float)) + Extract(ptr.offset, j);
-						if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(uint32_t)); }
-						Store(Extract(exponent, j), Pointer<Int>(&ptr.base[offset]), sizeof(uint32_t), false, std::memory_order_relaxed);
-					}
-				}
+				auto p = ptr + (i * sizeof(float));
+				if (interleavedByLane) { p = interleaveByLane(p); }
+				SIMD::Store(p, exponent, state->activeLaneMask());
 			}
 			break;
 		}
@@ -4454,30 +4430,29 @@
 		return EmitResult::Continue;
 	}
 
-	SIMD::Int SpirvShader::GetTexelOffset(GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize) const
+	SIMD::Pointer SpirvShader::GetTexelAddress(SIMD::Pointer ptr, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize) const
 	{
-		// returns a (lane-divergent) byte offset to a texel within a storage image.
 		bool isArrayed = imageType.definition.word(5) != 0;
 		int dims = getType(coordinate.type).sizeInComponents - (isArrayed ? 1 : 0);
 
-		SIMD::Int texelOffset = coordinate.Int(0) * SIMD::Int(texelSize);
+		ptr += coordinate.Int(0) * SIMD::Int(texelSize);
 		if (dims > 1)
 		{
-			texelOffset += coordinate.Int(1) * SIMD::Int(
+			ptr += coordinate.Int(1) * SIMD::Int(
 					*Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, rowPitchBytes)));
 		}
 		if (dims > 2)
 		{
-			texelOffset += coordinate.Int(2) * SIMD::Int(
+			ptr += coordinate.Int(2) * SIMD::Int(
 					*Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, slicePitchBytes)));
 		}
 		if (isArrayed)
 		{
-			texelOffset += coordinate.Int(dims) * SIMD::Int(
+			ptr += coordinate.Int(dims) * SIMD::Int(
 					*Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, slicePitchBytes)));
 		}
 
-		return texelOffset;
+		return ptr;
 	}
 
 	SpirvShader::EmitResult SpirvShader::EmitImageRead(InsnIterator insn, EmitState *state) const
@@ -4496,9 +4471,9 @@
 		auto coordinate = GenericValue(this, state->routine, insn.word(4));
 
 		auto pointer = state->routine->getPointer(imageId);
-		ASSERT(pointer.uniform);
 		Pointer<Byte> binding = pointer.base;
 		Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
+		auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
 
 		auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
 
@@ -4546,19 +4521,13 @@
 			UNIMPLEMENTED("spv::ImageFormat %u", format);
 		}
 
-		SIMD::Int texelOffset = GetTexelOffset(coordinate, imageType, binding, texelSize);
+		auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
+		auto texelPtr = GetTexelAddress(basePtr, coordinate, imageType, binding, texelSize);
 
 		for (auto i = 0u; i < numPackedElements; i++)
 		{
-			for (int j = 0; j < 4; j++)
-			{
-				If(Extract(state->activeLaneMask(), j) != 0)
-				{
-					Int offset = Int(sizeof(float) * i) + Extract(texelOffset, j);
-					packed[i] = Insert(packed[i], Load(RValue<Pointer<Int>>(&imageBase[offset]), sizeof(uint32_t), false,
-						  std::memory_order_relaxed), j);
-				}
-			}
+			packed[i] = SIMD::Load<SIMD::Int>(texelPtr, state->activeLaneMask());
+			texelPtr += sizeof(float);
 		}
 
 		switch(format)
@@ -4651,6 +4620,7 @@
 
 		Pointer<Byte> binding = state->routine->getPointer(imageId).base;
 		Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
+		auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
 
 		SIMD::Int packed[4];
 		auto numPackedElements = 0u;
@@ -4721,19 +4691,13 @@
 			UNIMPLEMENTED("spv::ImageFormat %u", format);
 		}
 
-		SIMD::Int texelOffset = GetTexelOffset(coordinate, imageType, binding, texelSize);
+		auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
+		auto texelPtr = GetTexelAddress(basePtr, coordinate, imageType, binding, texelSize);
 
 		for (auto i = 0u; i < numPackedElements; i++)
 		{
-			for (int j = 0; j < 4; j++)
-			{
-				If(Extract(state->activeLaneMask(), j) != 0)
-				{
-					Int offset = Int(sizeof(float) * i) + Extract(texelOffset, j);
-					Store(Extract(packed[i], j), RValue<Pointer<Int>>(&imageBase[offset]), sizeof(uint32_t), false,
-						  std::memory_order_relaxed);
-				}
-			}
+			SIMD::Store(texelPtr, packed[i], state->activeLaneMask());
+			texelPtr += sizeof(float);
 		}
 
 		return EmitResult::Continue;
@@ -4757,10 +4721,12 @@
 
 		Pointer<Byte> binding = state->routine->getPointer(imageId).base;
 		Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
+		auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
 
-		SIMD::Int texelOffset = GetTexelOffset(coordinate, imageType, binding, sizeof(uint32_t));
+		auto basePtr = SIMD::Pointer(imageBase, imageSizeInBytes);
+		auto ptr = GetTexelAddress(basePtr, coordinate, imageType, binding, sizeof(uint32_t));
 
-		state->routine->createPointer(resultId, SIMD::Pointer(imageBase, texelOffset));
+		state->routine->createPointer(resultId, ptr);
 
 		return EmitResult::Continue;
 	}
@@ -4776,13 +4742,14 @@
 		auto value = (insn.wordCount() == 7) ? GenericValue(this, state->routine, insn.word(6)).UInt(0) : RValue<SIMD::UInt>(1);
 		auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
 		auto ptr = state->routine->getPointer(insn.word(3));
+		auto ptrOffsets = ptr.offsets();
 
 		SIMD::UInt x;
 		for (int j = 0; j < SIMD::Width; j++)
 		{
 			If(Extract(state->activeLaneMask(), j) != 0)
 			{
-				auto offset = Extract(ptr.offset, j);
+				auto offset = Extract(ptrOffsets, j);
 				auto laneValue = Extract(value, j);
 				UInt v;
 				switch (insn.opcode())
@@ -4846,13 +4813,14 @@
 		auto comparator = GenericValue(this, state->routine, insn.word(8));
 		auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
 		auto ptr = state->routine->getPointer(insn.word(3));
+		auto ptrOffsets = ptr.offsets();
 
 		SIMD::UInt x;
 		for (int j = 0; j < SIMD::Width; j++)
 		{
 			If(Extract(state->activeLaneMask(), j) != 0)
 			{
-				auto offset = Extract(ptr.offset, j);
+				auto offset = Extract(ptrOffsets, j);
 				auto laneValue = Extract(value.UInt(0), j);
 				auto laneComparator = Extract(comparator.UInt(0), j);
 				UInt v = CompareExchangeAtomic(Pointer<UInt>(&ptr.base[offset]), laneValue, laneComparator, memoryOrderEqual, memoryOrderUnequal);
commit	9e4bc1babfeef7abe7fa288a36eedca18fd3db20	[log] [tgz]
author	Ben Clayton <bclayton@google.com>	Tue Apr 16 16:52:02 2019 -0400
committer	Ben Clayton <bclayton@google.com>	Wed Apr 24 16:22:22 2019 +0000
tree	d7c1c1274375bbd59206e305e00a3f84e3aa1c23
parent	4d1f8d05a85a4e8a77034edaa074f7e89ac65bf5 [diff] [blame]