Switch SIMD::Pointer::base from a Float* to Byte*

While having the base units as sizeof(float) is handy (as that's the smallest unit we currently support), it makes limit checking tricky as we're continually converting units of byte <-> float.

This sets us up nicely for when we want to support smaller types.

Change-Id: I3e334b6df4899f433b281118be23fdf8df6d9829
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/29328
Presubmit-Ready: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 66bdb8d..51a419e 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -1063,15 +1063,15 @@
 		// a functor for each scalar element within the object.
 
 		// The functor's first parameter is the index of the scalar element;
-		// the second parameter is the offset (in sizeof(float) units) from
-		// the base of the object.
+		// the second parameter is the offset (in bytes) from the base of the
+		// object.
 
 		ApplyDecorationsForId(&d, id);
 		auto const &type = getType(id);
 
 		if (d.HasOffset)
 		{
-			offset += d.Offset / sizeof(float);
+			offset += d.Offset;
 			d.HasOffset = false;
 		}
 
@@ -1086,7 +1086,7 @@
 			break;
 		case spv::OpTypeVector:
 		{
-			auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride / sizeof(float) : 1;
+			auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : sizeof(float);
 			for (auto i = 0u; i < type.definition.word(3); i++)
 			{
 				VisitMemoryObjectInner(type.definition.word(2), d, index, offset + elemStride * i, f);
@@ -1095,7 +1095,7 @@
 		}
 		case spv::OpTypeMatrix:
 		{
-			auto columnStride = (d.HasRowMajor && d.RowMajor) ? 1 : d.MatrixStride / sizeof(float);
+			auto columnStride = (d.HasRowMajor && d.RowMajor) ? sizeof(float) : d.MatrixStride;
 			d.InsideMatrix = true;
 			for (auto i = 0u; i < type.definition.word(3); i++)
 			{
@@ -1117,7 +1117,7 @@
 			for (auto i = 0u; i < arraySize; i++)
 			{
 				ASSERT(d.HasArrayStride);
-				VisitMemoryObjectInner<F>(type.definition.word(2), d, index, offset + i * d.ArrayStride / sizeof(float), f);
+				VisitMemoryObjectInner<F>(type.definition.word(2), d, index, offset + i * d.ArrayStride, f);
 			}
 			break;
 		}
@@ -1143,7 +1143,7 @@
 			// Objects without explicit layout are tightly packed.
 			for (auto i = 0u; i < getType(type.element).sizeInComponents; i++)
 			{
-				f(i, i);
+				f(i, i * sizeof(float));
 			}
 		}
 	}
@@ -1172,7 +1172,7 @@
 				auto setLayout = routine->pipelineLayout->getDescriptorSetLayout(d.DescriptorSet);
 				int bindingOffset = static_cast<int>(setLayout->getBindingOffset(d.Binding, arrayIndex));
 
-				Pointer<Byte> bufferInfo = Pointer<Byte>(set.base) + bindingOffset; // VkDescriptorBufferInfo*
+				Pointer<Byte> bufferInfo = set.base + bindingOffset; // VkDescriptorBufferInfo*
 				Pointer<Byte> buffer = *Pointer<Pointer<Byte>>(bufferInfo + OFFSET(VkDescriptorBufferInfo, buffer)); // vk::Buffer*
 				Pointer<Byte> data = *Pointer<Pointer<Byte>>(buffer + vk::Buffer::DataOffset); // void*
 				Int offset = *Pointer<Int>(bufferInfo + OFFSET(VkDescriptorBufferInfo, offset));
@@ -1269,7 +1269,7 @@
 				int memberIndex = GetConstantInt(indexIds[i]);
 				ApplyDecorationsForIdMember(&d, typeId, memberIndex);
 				ASSERT(d.HasOffset);
-				constantOffset += d.Offset / sizeof(float);
+				constantOffset += d.Offset;
 				typeId = type.definition.word(2u + memberIndex);
 				break;
 			}
@@ -1281,11 +1281,11 @@
 				auto & obj = getObject(indexIds[i]);
 				if (obj.kind == Object::Kind::Constant)
 				{
-					constantOffset += d.ArrayStride/sizeof(float) * GetConstantInt(indexIds[i]);
+					constantOffset += d.ArrayStride * GetConstantInt(indexIds[i]);
 				}
 				else
 				{
-					ptr.addOffset(SIMD::Int(d.ArrayStride / sizeof(float)) * routine->getIntermediate(indexIds[i]).Int(0));
+					ptr.addOffset(SIMD::Int(d.ArrayStride) * routine->getIntermediate(indexIds[i]).Int(0));
 				}
 				typeId = type.element;
 				break;
@@ -1295,7 +1295,7 @@
 				// TODO: b/127950082: Check bounds.
 				ASSERT(d.HasMatrixStride);
 				d.InsideMatrix = true;
-				auto columnStride = (d.HasRowMajor && d.RowMajor) ? 1 : d.MatrixStride/sizeof(float);
+				auto columnStride = (d.HasRowMajor && d.RowMajor) ? sizeof(float) : d.MatrixStride;
 				auto & obj = getObject(indexIds[i]);
 				if (obj.kind == Object::Kind::Constant)
 				{
@@ -1310,7 +1310,7 @@
 			}
 			case spv::OpTypeVector:
 			{
-				auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride / sizeof(float) : 1;
+				auto elemStride = (d.InsideMatrix && d.HasRowMajor && d.RowMajor) ? d.MatrixStride : sizeof(float);
 				auto & obj = getObject(indexIds[i]);
 				if (obj.kind == Object::Kind::Constant)
 				{
@@ -1357,7 +1357,7 @@
 				int offsetIntoStruct = 0;
 				for (auto j = 0; j < memberIndex; j++) {
 					auto memberType = type.definition.word(2u + j);
-					offsetIntoStruct += getType(memberType).sizeInComponents;
+					offsetIntoStruct += getType(memberType).sizeInComponents * sizeof(float);
 				}
 				constantOffset += offsetIntoStruct;
 				typeId = type.definition.word(2u + memberIndex);
@@ -1370,7 +1370,7 @@
 			case spv::OpTypeRuntimeArray:
 			{
 				// TODO: b/127950082: Check bounds.
-				auto stride = getType(type.element).sizeInComponents;
+				auto stride = getType(type.element).sizeInComponents * sizeof(float);
 				auto & obj = getObject(indexIds[i]);
 				if (obj.kind == Object::Kind::Constant)
 				{
@@ -1398,7 +1398,7 @@
 
 	uint32_t SpirvShader::WalkLiteralAccessChain(Type::ID typeId, uint32_t numIndexes, uint32_t const *indexes) const
 	{
-		uint32_t constantOffset = 0;
+		uint32_t componentOffset = 0;
 
 		for (auto i = 0u; i < numIndexes; i++)
 		{
@@ -1413,7 +1413,7 @@
 					auto memberType = type.definition.word(2u + j);
 					offsetIntoStruct += getType(memberType).sizeInComponents;
 				}
-				constantOffset += offsetIntoStruct;
+				componentOffset += offsetIntoStruct;
 				typeId = type.definition.word(2u + memberIndex);
 				break;
 			}
@@ -1424,7 +1424,7 @@
 			{
 				auto elementType = type.definition.word(2);
 				auto stride = getType(elementType).sizeInComponents;
-				constantOffset += stride * indexes[i];
+				componentOffset += stride * indexes[i];
 				typeId = elementType;
 				break;
 			}
@@ -1434,7 +1434,7 @@
 			}
 		}
 
-		return constantOffset;
+		return componentOffset;
 	}
 
 	void SpirvShader::Decorations::Apply(spv::Decoration decoration, uint32_t arg)
@@ -2326,8 +2326,8 @@
 					If(Extract(state->activeLaneMask(), j) != 0)
 					{
 						Int offset = Int(o) + Extract(ptr.offset, j);
-						if (interleavedByLane) { offset = offset * SIMD::Width + j; }
-						load[i] = Insert(load[i], Load(&ptr.base[offset], sizeof(float), atomic, memoryOrder), j);
+						if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
+						load[i] = Insert(load[i], Load(Pointer<Float>(&ptr.base[offset]), sizeof(float), atomic, memoryOrder), j);
 					}
 				}
 			});
@@ -2338,18 +2338,19 @@
 			if (interleavedByLane)
 			{
 				// Lane-interleaved data.
-				Pointer<SIMD::Float> src = ptr.base;
-				VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
+				VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
 				{
-					load[i] = Load(&src[o], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
+					Pointer<SIMD::Float> src = &ptr.base[offset * SIMD::Width];
+					load[i] = Load(src, sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
 				});
 			}
 			else
 			{
 				// Non-interleaved data.
-				VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
+				VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
 				{
-					load[i] = RValue<SIMD::Float>(Load(&ptr.base[o], sizeof(float), atomic, memoryOrder));  // TODO: optimize alignment
+					Pointer<Float> src = &ptr.base[offset];
+					load[i] = RValue<SIMD::Float>(Load(src, sizeof(float), atomic, memoryOrder));  // TODO: optimize alignment
 				});
 			}
 		}
@@ -2408,8 +2409,8 @@
 						If(Extract(state->activeLaneMask(), j) != 0)
 						{
 							Int offset = Int(o) + Extract(ptr.offset, j);
-							if (interleavedByLane) { offset = offset * SIMD::Width + j; }
-							Store(RValue<Float>(src[i]), &ptr.base[offset], sizeof(float), atomic, memoryOrder);
+							if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
+							Store(Float(src[i]), Pointer<Float>(&ptr.base[offset]), sizeof(float), atomic, memoryOrder);
 						}
 					}
 				});
@@ -2418,10 +2419,10 @@
 			{
 				// Constant source data.
 				// No divergent offsets or masked lanes.
-				Pointer<SIMD::Float> dst = ptr.base;
-				VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
+				VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
 				{
-					Store(RValue<SIMD::Float>(src[i]), &dst[o], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
+					Pointer<SIMD::Float> dst = &ptr.base[offset * SIMD::Width];
+					Store(SIMD::Float(src[i]), dst, sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
 				});
 			}
 		}
@@ -2439,8 +2440,8 @@
 						If(Extract(state->activeLaneMask(), j) != 0)
 						{
 							Int offset = Int(o) + Extract(ptr.offset, j);
-							if (interleavedByLane) { offset = offset * SIMD::Width + j; }
-							Store(Extract(src.Float(i), j), &ptr.base[offset], sizeof(float), atomic, memoryOrder);
+							if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
+							Store(Extract(src.Float(i), j), Pointer<Float>(&ptr.base[offset]), sizeof(float), atomic, memoryOrder);
 						}
 					}
 				});
@@ -2451,19 +2452,19 @@
 				if (interleavedByLane)
 				{
 					// Lane-interleaved data.
-					Pointer<SIMD::Float> dst = ptr.base;
-					VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
+					VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
 					{
-						Store(src.Float(i), &dst[o], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
+						Pointer<SIMD::Float> dst = &ptr.base[offset * SIMD::Width];
+						Store(src.Float(i), dst, sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
 					});
 				}
 				else
 				{
 					// Intermediate source data. Non-interleaved data.
-					Pointer<SIMD::Float> dst = ptr.base;
-					VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t o)
+					VisitMemoryObject(pointerId, [&](uint32_t i, uint32_t offset)
 					{
-						Store<SIMD::Float>(SIMD::Float(src.Float(i)), &dst[o], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
+						Pointer<SIMD::Float> dst = &ptr.base[offset];
+						Store(SIMD::Float(src.Float(i)), dst, sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
 					});
 				}
 			}
@@ -3518,9 +3519,9 @@
 				{
 					If(Extract(state->activeLaneMask(), j) != 0)
 					{
-						Int offset = Int(i) + Extract(ptr.offset, j);
-						if (interleavedByLane) { offset = offset * SIMD::Width + j; }
-						Store(Extract(whole, j), &ptr.base[offset], sizeof(float), false, std::memory_order_relaxed);
+						Int offset = Int(i * sizeof(float)) + Extract(ptr.offset, j);
+						if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(float)); }
+						Store(Extract(whole, j), Pointer<Float>(&ptr.base[offset]), sizeof(float), false, std::memory_order_relaxed);
 					}
 				}
 			}
@@ -3660,12 +3661,11 @@
 				// TODO: Refactor and consolidate with EmitStore.
 				for (int j = 0; j < SIMD::Width; j++)
 				{
-					auto ptrBase = Pointer<Int>(ptr.base);
 					If(Extract(state->activeLaneMask(), j) != 0)
 					{
-						Int offset = Int(i) + Extract(ptr.offset, j);
-						if (interleavedByLane) { offset = offset * SIMD::Width + j; }
-						Store(Extract(exponent, j), &ptrBase[offset], sizeof(uint32_t), false, std::memory_order_relaxed);
+						Int offset = Int(i * sizeof(float)) + Extract(ptr.offset, j);
+						if (interleavedByLane) { offset = offset * SIMD::Width + (j * sizeof(uint32_t)); }
+						Store(Extract(exponent, j), Pointer<Int>(&ptr.base[offset]), sizeof(uint32_t), false, std::memory_order_relaxed);
 					}
 				}
 			}
@@ -4735,8 +4735,7 @@
 		Pointer<Byte> binding = Pointer<Byte>(set + bindingOffset);
 		Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
 
-		// TODO: texelOffset is in bytes - remove shift when SIMD::Pointer is offset by bytes.
-		SIMD::Int texelOffset = GetTexelOffset(coordinate, imageType, binding, sizeof(uint32_t)) >> 2;
+		SIMD::Int texelOffset = GetTexelOffset(coordinate, imageType, binding, sizeof(uint32_t));
 
 		state->routine->createPointer(resultId, SIMD::Pointer(imageBase, texelOffset));
 
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 7a0b30e..ec0d448 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -71,9 +71,9 @@
 			inline void addOffset(Int delta) { offset += delta; uniform = false; }
 
 			// Base address for the pointer, common across all lanes.
-			rr::Pointer<rr::Float> base;
+			rr::Pointer<rr::Byte> base;
 
-			// Per lane offsets from base.
+			// Per lane offsets from base in bytes.
 			// If uniform is true, all offsets are considered zero.
 			Int offset;
 
@@ -624,6 +624,8 @@
 
 		SIMD::Pointer WalkExplicitLayoutAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const;
 		SIMD::Pointer WalkAccessChain(Object::ID id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const;
+
+		// Returns the *component* offset in the literal for the given access chain.
 		uint32_t WalkLiteralAccessChain(Type::ID id, uint32_t numIndexes, uint32_t const *indexes) const;
 
 		// EmitState holds control-flow state for the emit() pass.
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index f28da09..f6341e2 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2349,11 +2349,30 @@
 	}
 
 	template<typename T>
+	RValue<T> Load(Pointer<T> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
+	{
+		return Load(RValue<Pointer<T>>(pointer), alignment, atomic, memoryOrder);
+	}
+
+	template<typename T>
 	void Store(RValue<T> value, RValue<Pointer<T>> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
 	{
 		Nucleus::createStore(value.value, pointer.value, T::getType(), false, alignment, atomic, memoryOrder);
 	}
 
+	template<typename T>
+	void Store(RValue<T> value, Pointer<T> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
+	{
+		Store(value, RValue<Pointer<T>>(pointer), alignment, atomic, memoryOrder);
+	}
+
+	template<typename T>
+	void Store(T value, Pointer<T> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
+	{
+		Store(RValue<T>(value), RValue<Pointer<T>>(pointer), alignment, atomic, memoryOrder);
+	}
+
+
 	template<class T, int S = 1>
 	class Array : public LValue<T>
 	{