Support more 10_10_10_2 formats as vertex attributes

Vertex input buffers only support VK_FORMAT_A2B10G10R10_UNORM_PACK32.
This cl adds all the signed and integer variants of that format.

This cl also removes StreamType and replaces it with VkFormat.

Tests: dEQP-VK.*r10*
Bug: b/142661203
Change-Id: I996705395cbb493c599e1a460a6368a7e00d5a55
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/40348
Tested-by: Alexis Hétu <sugoi@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp
index d94d0ba..a29e3ea 100644
--- a/src/Device/Renderer.cpp
+++ b/src/Device/Renderer.cpp
@@ -1175,12 +1175,13 @@
 	occlusionQuery = nullptr;
 }
 
+// TODO(b/137740918): Optimize instancing to use a single draw call.
 void Renderer::advanceInstanceAttributes(Stream *inputs)
 {
 	for(uint32_t i = 0; i < vk::MAX_VERTEX_INPUT_BINDINGS; i++)
 	{
 		auto &attrib = inputs[i];
-		if(attrib.count && attrib.instanceStride && (attrib.instanceStride < attrib.robustnessSize))
+		if((attrib.format != VK_FORMAT_UNDEFINED) && attrib.instanceStride && (attrib.instanceStride < attrib.robustnessSize))
 		{
 			// Under the casts: attrib.buffer += attrib.instanceStride
 			attrib.buffer = (void const *)((uintptr_t)attrib.buffer + attrib.instanceStride);
diff --git a/src/Device/Stream.hpp b/src/Device/Stream.hpp
index 8e2fd98..e180bc0 100644
--- a/src/Device/Stream.hpp
+++ b/src/Device/Stream.hpp
@@ -15,36 +15,17 @@
 #ifndef sw_Stream_hpp
 #define sw_Stream_hpp
 
-#include "System/Types.hpp"
+#include <Vulkan/VulkanPlatform.h>
 
 namespace sw {
 
-enum StreamType ENUM_UNDERLYING_TYPE_UNSIGNED_INT
-{
-	STREAMTYPE_COLOR,  // 4 normalized unsigned bytes, ZYXW order
-	STREAMTYPE_FLOAT,  // Normalization ignored
-	STREAMTYPE_BYTE,
-	STREAMTYPE_SBYTE,
-	STREAMTYPE_SHORT,
-	STREAMTYPE_USHORT,
-	STREAMTYPE_INT,
-	STREAMTYPE_UINT,
-	STREAMTYPE_HALF,  // Normalization ignored
-	STREAMTYPE_2_10_10_10_INT,
-	STREAMTYPE_2_10_10_10_UINT,
-
-	STREAMTYPE_LAST = STREAMTYPE_2_10_10_10_UINT
-};
-
 struct Stream
 {
 	const void *buffer = nullptr;
 	unsigned int robustnessSize = 0;
 	unsigned int vertexStride = 0;
 	unsigned int instanceStride = 0;
-	StreamType type = STREAMTYPE_FLOAT;
-	unsigned char count = 0;
-	bool normalized = false;
+	VkFormat format = VK_FORMAT_UNDEFINED;
 	unsigned int offset = 0;
 	unsigned int binding = 0;
 };
diff --git a/src/Device/VertexProcessor.cpp b/src/Device/VertexProcessor.cpp
index 3f72121..88359a2 100644
--- a/src/Device/VertexProcessor.cpp
+++ b/src/Device/VertexProcessor.cpp
@@ -44,32 +44,6 @@
 	return hash;
 }
 
-unsigned int VertexProcessor::States::Input::bytesPerAttrib() const
-{
-	switch(type)
-	{
-		case STREAMTYPE_FLOAT:
-		case STREAMTYPE_INT:
-		case STREAMTYPE_UINT:
-			return count * sizeof(uint32_t);
-		case STREAMTYPE_HALF:
-		case STREAMTYPE_SHORT:
-		case STREAMTYPE_USHORT:
-			return count * sizeof(uint16_t);
-		case STREAMTYPE_BYTE:
-		case STREAMTYPE_SBYTE:
-			return count * sizeof(uint8_t);
-		case STREAMTYPE_COLOR:
-		case STREAMTYPE_2_10_10_10_INT:
-		case STREAMTYPE_2_10_10_10_UINT:
-			return sizeof(int);
-		default:
-			UNSUPPORTED("stream.type %d", int(type));
-	}
-
-	return 0;
-}
-
 bool VertexProcessor::State::operator==(const State &state) const
 {
 	if(hash != state.hash)
@@ -109,9 +83,7 @@
 
 	for(int i = 0; i < MAX_INTERFACE_COMPONENTS / 4; i++)
 	{
-		state.input[i].type = context->input[i].type;
-		state.input[i].count = context->input[i].count;
-		state.input[i].normalized = context->input[i].normalized;
+		state.input[i].format = context->input[i].format;
 		// TODO: get rid of attribType -- just keep the VK format all the way through, this fully determines
 		// how to handle the attribute.
 		state.input[i].attribType = context->vertexShader->inputs[i * 4].Type;
diff --git a/src/Device/VertexProcessor.hpp b/src/Device/VertexProcessor.hpp
index 77117d9..1c5b28c 100644
--- a/src/Device/VertexProcessor.hpp
+++ b/src/Device/VertexProcessor.hpp
@@ -68,14 +68,10 @@
 		{
 			operator bool() const  // Returns true if stream contains data
 			{
-				return count != 0;
+				return format != VK_FORMAT_UNDEFINED;
 			}
 
-			unsigned int bytesPerAttrib() const;
-
-			StreamType type : BITS(STREAMTYPE_LAST);
-			unsigned int count : 3;
-			bool normalized : 1;
+			VkFormat format;  // TODO(b/148016460): Could be restricted to VK_FORMAT_END_RANGE
 			unsigned int attribType : BITS(SpirvShader::ATTRIBTYPE_LAST);
 		};
 
diff --git a/src/Pipeline/VertexRoutine.cpp b/src/Pipeline/VertexRoutine.cpp
index 517ac70..99cdbe3 100644
--- a/src/Pipeline/VertexRoutine.cpp
+++ b/src/Pipeline/VertexRoutine.cpp
@@ -177,11 +177,13 @@
 	Pointer<Byte> source2 = buffer + offsets.z;
 	Pointer<Byte> source3 = buffer + offsets.w;
 
+	vk::Format format(stream.format);
+
 	UInt4 zero(0);
 	if(robustBufferAccess)
 	{
 		// TODO(b/141124876): Optimize for wide-vector gather operations.
-		UInt4 limits = offsets + UInt4(stream.bytesPerAttrib());
+		UInt4 limits = offsets + UInt4(format.bytes());
 		Pointer<Byte> zeroSource = As<Pointer<Byte>>(&zero);
 		source0 = IfThenElse(limits.x <= robustnessSize, source0, zeroSource);
 		source1 = IfThenElse(limits.y <= robustnessSize, source1, zeroSource);
@@ -189,19 +191,25 @@
 		source3 = IfThenElse(limits.w <= robustnessSize, source3, zeroSource);
 	}
 
-	bool isNativeFloatAttrib = (stream.attribType == SpirvShader::ATTRIBTYPE_FLOAT) || stream.normalized;
+	int componentCount = format.componentCount();
+	bool normalized = !format.isUnnormalizedInteger();
+	bool isNativeFloatAttrib = (stream.attribType == SpirvShader::ATTRIBTYPE_FLOAT) || normalized;
+	bool bgra = false;
 
-	switch(stream.type)
+	switch(stream.format)
 	{
-		case STREAMTYPE_FLOAT:
+		case VK_FORMAT_R32_SFLOAT:
+		case VK_FORMAT_R32G32_SFLOAT:
+		case VK_FORMAT_R32G32B32_SFLOAT:
+		case VK_FORMAT_R32G32B32A32_SFLOAT:
 		{
-			if(stream.count == 0)
+			if(componentCount == 0)
 			{
 				// Null stream, all default components
 			}
 			else
 			{
-				if(stream.count == 1)
+				if(componentCount == 1)
 				{
 					v.x.x = *Pointer<Float>(source0);
 					v.x.y = *Pointer<Float>(source1);
@@ -215,22 +223,22 @@
 					v.z = *Pointer<Float4>(source2);
 					v.w = *Pointer<Float4>(source3);
 
-					transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+					transpose4xN(v.x, v.y, v.z, v.w, componentCount);
 				}
 
 				switch(stream.attribType)
 				{
 					case SpirvShader::ATTRIBTYPE_INT:
-						if(stream.count >= 1) v.x = As<Float4>(Int4(v.x));
-						if(stream.count >= 2) v.x = As<Float4>(Int4(v.y));
-						if(stream.count >= 3) v.x = As<Float4>(Int4(v.z));
-						if(stream.count >= 4) v.x = As<Float4>(Int4(v.w));
+						if(componentCount >= 1) v.x = As<Float4>(Int4(v.x));
+						if(componentCount >= 2) v.x = As<Float4>(Int4(v.y));
+						if(componentCount >= 3) v.x = As<Float4>(Int4(v.z));
+						if(componentCount >= 4) v.x = As<Float4>(Int4(v.w));
 						break;
 					case SpirvShader::ATTRIBTYPE_UINT:
-						if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x));
-						if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y));
-						if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z));
-						if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w));
+						if(componentCount >= 1) v.x = As<Float4>(UInt4(v.x));
+						if(componentCount >= 2) v.x = As<Float4>(UInt4(v.y));
+						if(componentCount >= 3) v.x = As<Float4>(UInt4(v.z));
+						if(componentCount >= 4) v.x = As<Float4>(UInt4(v.w));
 						break;
 					default:
 						break;
@@ -238,7 +246,16 @@
 			}
 		}
 		break;
-		case STREAMTYPE_BYTE:
+		case VK_FORMAT_B8G8R8A8_UNORM:
+			bgra = true;
+		case VK_FORMAT_R8_UNORM:
+		case VK_FORMAT_R8G8_UNORM:
+		case VK_FORMAT_R8G8B8A8_UNORM:
+		case VK_FORMAT_R8_UINT:
+		case VK_FORMAT_R8G8_UINT:
+		case VK_FORMAT_R8G8B8A8_UINT:
+		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
 			if(isNativeFloatAttrib)  // Stream: UByte, Shader attrib: Float
 			{
 				v.x = Float4(*Pointer<Byte4>(source0));
@@ -246,14 +263,14 @@
 				v.z = Float4(*Pointer<Byte4>(source2));
 				v.w = Float4(*Pointer<Byte4>(source3));
 
-				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+				transpose4xN(v.x, v.y, v.z, v.w, componentCount);
 
-				if(stream.normalized)
+				if(normalized)
 				{
-					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
-					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
-					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
-					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+					if(componentCount >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+					if(componentCount >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+					if(componentCount >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
+					if(componentCount >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
 				}
 			}
 			else  // Stream: UByte, Shader attrib: Int / UInt
@@ -263,10 +280,17 @@
 				v.z = As<Float4>(Int4(*Pointer<Byte4>(source2)));
 				v.w = As<Float4>(Int4(*Pointer<Byte4>(source3)));
 
-				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+				transpose4xN(v.x, v.y, v.z, v.w, componentCount);
 			}
 			break;
-		case STREAMTYPE_SBYTE:
+		case VK_FORMAT_R8_SNORM:
+		case VK_FORMAT_R8_SINT:
+		case VK_FORMAT_R8G8_SNORM:
+		case VK_FORMAT_R8G8_SINT:
+		case VK_FORMAT_R8G8B8A8_SNORM:
+		case VK_FORMAT_R8G8B8A8_SINT:
+		case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
+		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
 			if(isNativeFloatAttrib)  // Stream: SByte, Shader attrib: Float
 			{
 				v.x = Float4(*Pointer<SByte4>(source0));
@@ -274,14 +298,14 @@
 				v.z = Float4(*Pointer<SByte4>(source2));
 				v.w = Float4(*Pointer<SByte4>(source3));
 
-				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+				transpose4xN(v.x, v.y, v.z, v.w, componentCount);
 
-				if(stream.normalized)
+				if(normalized)
 				{
-					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
-					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
-					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
-					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
+					if(componentCount >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
+					if(componentCount >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
+					if(componentCount >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
+					if(componentCount >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleSByte));
 				}
 			}
 			else  // Stream: SByte, Shader attrib: Int / UInt
@@ -291,25 +315,15 @@
 				v.z = As<Float4>(Int4(*Pointer<SByte4>(source2)));
 				v.w = As<Float4>(Int4(*Pointer<SByte4>(source3)));
 
-				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+				transpose4xN(v.x, v.y, v.z, v.w, componentCount);
 			}
 			break;
-		case STREAMTYPE_COLOR:
-		{
-			v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
-			v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
-			v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
-			v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants, unscaleByte));
-
-			transpose4x4(v.x, v.y, v.z, v.w);
-
-			// Swap red and blue
-			Float4 t = v.x;
-			v.x = v.z;
-			v.z = t;
-		}
-		break;
-		case STREAMTYPE_SHORT:
+		case VK_FORMAT_R16_SNORM:
+		case VK_FORMAT_R16_SINT:
+		case VK_FORMAT_R16G16_SNORM:
+		case VK_FORMAT_R16G16_SINT:
+		case VK_FORMAT_R16G16B16A16_SNORM:
+		case VK_FORMAT_R16G16B16A16_SINT:
 			if(isNativeFloatAttrib)  // Stream: Int, Shader attrib: Float
 			{
 				v.x = Float4(*Pointer<Short4>(source0));
@@ -317,14 +331,14 @@
 				v.z = Float4(*Pointer<Short4>(source2));
 				v.w = Float4(*Pointer<Short4>(source3));
 
-				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+				transpose4xN(v.x, v.y, v.z, v.w, componentCount);
 
-				if(stream.normalized)
+				if(normalized)
 				{
-					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
-					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
-					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
-					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
+					if(componentCount >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
+					if(componentCount >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
+					if(componentCount >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
+					if(componentCount >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleShort));
 				}
 			}
 			else  // Stream: Short, Shader attrib: Int/UInt, no type conversion
@@ -334,10 +348,15 @@
 				v.z = As<Float4>(Int4(*Pointer<Short4>(source2)));
 				v.w = As<Float4>(Int4(*Pointer<Short4>(source3)));
 
-				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+				transpose4xN(v.x, v.y, v.z, v.w, componentCount);
 			}
 			break;
-		case STREAMTYPE_USHORT:
+		case VK_FORMAT_R16_UNORM:
+		case VK_FORMAT_R16_UINT:
+		case VK_FORMAT_R16G16_UNORM:
+		case VK_FORMAT_R16G16_UINT:
+		case VK_FORMAT_R16G16B16A16_UNORM:
+		case VK_FORMAT_R16G16B16A16_UINT:
 			if(isNativeFloatAttrib)  // Stream: Int, Shader attrib: Float
 			{
 				v.x = Float4(*Pointer<UShort4>(source0));
@@ -345,14 +364,14 @@
 				v.z = Float4(*Pointer<UShort4>(source2));
 				v.w = Float4(*Pointer<UShort4>(source3));
 
-				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+				transpose4xN(v.x, v.y, v.z, v.w, componentCount);
 
-				if(stream.normalized)
+				if(normalized)
 				{
-					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
-					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
-					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
-					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
+					if(componentCount >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
+					if(componentCount >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
+					if(componentCount >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
+					if(componentCount >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUShort));
 				}
 			}
 			else  // Stream: UShort, Shader attrib: Int/UInt, no type conversion
@@ -362,10 +381,13 @@
 				v.z = As<Float4>(Int4(*Pointer<UShort4>(source2)));
 				v.w = As<Float4>(Int4(*Pointer<UShort4>(source3)));
 
-				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+				transpose4xN(v.x, v.y, v.z, v.w, componentCount);
 			}
 			break;
-		case STREAMTYPE_INT:
+		case VK_FORMAT_R32_SINT:
+		case VK_FORMAT_R32G32_SINT:
+		case VK_FORMAT_R32G32B32_SINT:
+		case VK_FORMAT_R32G32B32A32_SINT:
 			if(isNativeFloatAttrib)  // Stream: Int, Shader attrib: Float
 			{
 				v.x = Float4(*Pointer<Int4>(source0));
@@ -373,14 +395,14 @@
 				v.z = Float4(*Pointer<Int4>(source2));
 				v.w = Float4(*Pointer<Int4>(source3));
 
-				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+				transpose4xN(v.x, v.y, v.z, v.w, componentCount);
 
-				if(stream.normalized)
+				if(normalized)
 				{
-					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
-					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
-					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
-					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+					if(componentCount >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+					if(componentCount >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+					if(componentCount >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
+					if(componentCount >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt));
 				}
 			}
 			else  // Stream: Int, Shader attrib: Int/UInt, no type conversion
@@ -390,10 +412,13 @@
 				v.z = *Pointer<Float4>(source2);
 				v.w = *Pointer<Float4>(source3);
 
-				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+				transpose4xN(v.x, v.y, v.z, v.w, componentCount);
 			}
 			break;
-		case STREAMTYPE_UINT:
+		case VK_FORMAT_R32_UINT:
+		case VK_FORMAT_R32G32_UINT:
+		case VK_FORMAT_R32G32B32_UINT:
+		case VK_FORMAT_R32G32B32A32_UINT:
 			if(isNativeFloatAttrib)  // Stream: UInt, Shader attrib: Float
 			{
 				v.x = Float4(*Pointer<UInt4>(source0));
@@ -401,14 +426,14 @@
 				v.z = Float4(*Pointer<UInt4>(source2));
 				v.w = Float4(*Pointer<UInt4>(source3));
 
-				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+				transpose4xN(v.x, v.y, v.z, v.w, componentCount);
 
-				if(stream.normalized)
+				if(normalized)
 				{
-					if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
-					if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
-					if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
-					if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
+					if(componentCount >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
+					if(componentCount >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
+					if(componentCount >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
+					if(componentCount >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt));
 				}
 			}
 			else  // Stream: UInt, Shader attrib: Int/UInt, no type conversion
@@ -418,12 +443,14 @@
 				v.z = *Pointer<Float4>(source2);
 				v.w = *Pointer<Float4>(source3);
 
-				transpose4xN(v.x, v.y, v.z, v.w, stream.count);
+				transpose4xN(v.x, v.y, v.z, v.w, componentCount);
 			}
 			break;
-		case STREAMTYPE_HALF:
+		case VK_FORMAT_R16_SFLOAT:
+		case VK_FORMAT_R16G16_SFLOAT:
+		case VK_FORMAT_R16G16B16A16_SFLOAT:
 		{
-			if(stream.count >= 1)
+			if(componentCount >= 1)
 			{
 				UShort x0 = *Pointer<UShort>(source0 + 0);
 				UShort x1 = *Pointer<UShort>(source1 + 0);
@@ -436,7 +463,7 @@
 				v.x.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(x3) * 4);
 			}
 
-			if(stream.count >= 2)
+			if(componentCount >= 2)
 			{
 				UShort y0 = *Pointer<UShort>(source0 + 2);
 				UShort y1 = *Pointer<UShort>(source1 + 2);
@@ -449,7 +476,7 @@
 				v.y.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(y3) * 4);
 			}
 
-			if(stream.count >= 3)
+			if(componentCount >= 3)
 			{
 				UShort z0 = *Pointer<UShort>(source0 + 4);
 				UShort z1 = *Pointer<UShort>(source1 + 4);
@@ -462,7 +489,7 @@
 				v.z.w = *Pointer<Float>(constants + OFFSET(Constants, half2float) + Int(z3) * 4);
 			}
 
-			if(stream.count >= 4)
+			if(componentCount >= 4)
 			{
 				UShort w0 = *Pointer<UShort>(source0 + 6);
 				UShort w1 = *Pointer<UShort>(source1 + 6);
@@ -476,29 +503,46 @@
 			}
 		}
 		break;
-		case STREAMTYPE_2_10_10_10_INT:
+		case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
+		case VK_FORMAT_A2R10G10B10_SINT_PACK32:
+			bgra = true;
+		case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
+		case VK_FORMAT_A2B10G10R10_SINT_PACK32:
 		{
 			Int4 src;
 			src = Insert(src, *Pointer<Int>(source0), 0);
 			src = Insert(src, *Pointer<Int>(source1), 1);
 			src = Insert(src, *Pointer<Int>(source2), 2);
 			src = Insert(src, *Pointer<Int>(source3), 3);
-
-			v.x = Float4((src << 22) >> 22);
-			v.y = Float4((src << 12) >> 22);
-			v.z = Float4((src << 02) >> 22);
-			v.w = Float4(src >> 30);
-
-			if(stream.normalized)
+			if(isNativeFloatAttrib)  // Stream: Int, Shader attrib: Float
 			{
-				v.x = Max(v.x * Float4(1.0f / 0x1FF), Float4(-1.0f));
-				v.y = Max(v.y * Float4(1.0f / 0x1FF), Float4(-1.0f));
-				v.z = Max(v.z * Float4(1.0f / 0x1FF), Float4(-1.0f));
-				v.w = Max(v.w, Float4(-1.0f));
+				v.x = Float4((src << 22) >> 22);
+				v.y = Float4((src << 12) >> 22);
+				v.z = Float4((src << 02) >> 22);
+				v.w = Float4(src >> 30);
+
+				if(normalized)
+				{
+					v.x = Max(v.x * Float4(1.0f / 0x1FF), Float4(-1.0f));
+					v.y = Max(v.y * Float4(1.0f / 0x1FF), Float4(-1.0f));
+					v.z = Max(v.z * Float4(1.0f / 0x1FF), Float4(-1.0f));
+					v.w = Max(v.w, Float4(-1.0f));
+				}
+			}
+			else  // Stream: UInt, Shader attrib: Int/UInt, no type conversion
+			{
+				v.x = As<Float4>((src << 22) >> 22);
+				v.y = As<Float4>((src << 12) >> 22);
+				v.z = As<Float4>((src << 02) >> 22);
+				v.w = As<Float4>(src >> 30);
 			}
 		}
 		break;
-		case STREAMTYPE_2_10_10_10_UINT:
+		case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+		case VK_FORMAT_A2R10G10B10_UINT_PACK32:
+			bgra = true;
+		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
 		{
 			Int4 src;
 			src = Insert(src, *Pointer<Int>(source0), 0);
@@ -506,28 +550,46 @@
 			src = Insert(src, *Pointer<Int>(source2), 2);
 			src = Insert(src, *Pointer<Int>(source3), 3);
 
-			v.x = Float4(src & Int4(0x3FF));
-			v.y = Float4((src >> 10) & Int4(0x3FF));
-			v.z = Float4((src >> 20) & Int4(0x3FF));
-			v.w = Float4((src >> 30) & Int4(0x3));
-
-			if(stream.normalized)
+			if(isNativeFloatAttrib)  // Stream: Int, Shader attrib: Float
 			{
-				v.x *= Float4(1.0f / 0x3FF);
-				v.y *= Float4(1.0f / 0x3FF);
-				v.z *= Float4(1.0f / 0x3FF);
-				v.w *= Float4(1.0f / 0x3);
+				v.x = Float4(src & Int4(0x3FF));
+				v.y = Float4((src >> 10) & Int4(0x3FF));
+				v.z = Float4((src >> 20) & Int4(0x3FF));
+				v.w = Float4((src >> 30) & Int4(0x3));
+
+				if(normalized)
+				{
+					v.x *= Float4(1.0f / 0x3FF);
+					v.y *= Float4(1.0f / 0x3FF);
+					v.z *= Float4(1.0f / 0x3FF);
+					v.w *= Float4(1.0f / 0x3);
+				}
+			}
+			else  // Stream: UInt, Shader attrib: Int/UInt, no type conversion
+			{
+				v.x = As<Float4>(src & Int4(0x3FF));
+				v.y = As<Float4>((src >> 10) & Int4(0x3FF));
+				v.z = As<Float4>((src >> 20) & Int4(0x3FF));
+				v.w = As<Float4>((src >> 30) & Int4(0x3));
 			}
 		}
 		break;
 		default:
-			UNSUPPORTED("stream.type %d", int(stream.type));
+			UNSUPPORTED("stream.format %d", int(stream.format));
 	}
 
-	if(stream.count < 1) v.x = Float4(0.0f);
-	if(stream.count < 2) v.y = Float4(0.0f);
-	if(stream.count < 3) v.z = Float4(0.0f);
-	if(stream.count < 4) v.w = isNativeFloatAttrib ? As<Float4>(Float4(1.0f)) : As<Float4>(Int4(1));
+	if(bgra)
+	{
+		// Swap red and blue
+		Float4 t = v.x;
+		v.x = v.z;
+		v.z = t;
+	}
+
+	if(componentCount < 1) v.x = Float4(0.0f);
+	if(componentCount < 2) v.y = Float4(0.0f);
+	if(componentCount < 3) v.z = Float4(0.0f);
+	if(componentCount < 4) v.w = isNativeFloatAttrib ? As<Float4>(Float4(1.0f)) : As<Float4>(Int4(1));
 
 	return v;
 }
diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp
index e4b02c8..9f38549 100644
--- a/src/Vulkan/VkCommandBuffer.cpp
+++ b/src/Vulkan/VkCommandBuffer.cpp
@@ -1811,7 +1811,7 @@
 	for(uint32_t i = 0; i < MAX_VERTEX_INPUT_BINDINGS; i++)
 	{
 		auto &attrib = context.input[i];
-		if(attrib.count)
+		if(attrib.format != VK_FORMAT_UNDEFINED)
 		{
 			const auto &vertexInput = vertexInputBindings[attrib.binding];
 			VkDeviceSize offset = attrib.offset + vertexInput.offset +
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index 875f0c1..364c26d 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -689,7 +689,14 @@
 		case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
 		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
 		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+		case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+		case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
+		case VK_FORMAT_A2R10G10B10_UINT_PACK32:
+		case VK_FORMAT_A2R10G10B10_SINT_PACK32:
 		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+		case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
+		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+		case VK_FORMAT_A2B10G10R10_SINT_PACK32:
 		case VK_FORMAT_R16_UNORM:
 		case VK_FORMAT_R16_SNORM:
 		case VK_FORMAT_R16_UINT:
diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp
index 6134ae4..a009c5e 100644
--- a/src/Vulkan/VkPipeline.cpp
+++ b/src/Vulkan/VkPipeline.cpp
@@ -31,132 +31,6 @@
 
 namespace {
 
-sw::StreamType getStreamType(VkFormat format)
-{
-	switch(format)
-	{
-		case VK_FORMAT_R8_UNORM:
-		case VK_FORMAT_R8G8_UNORM:
-		case VK_FORMAT_R8G8B8A8_UNORM:
-		case VK_FORMAT_R8_UINT:
-		case VK_FORMAT_R8G8_UINT:
-		case VK_FORMAT_R8G8B8A8_UINT:
-		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
-		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
-			return sw::STREAMTYPE_BYTE;
-		case VK_FORMAT_B8G8R8A8_UNORM:
-			return sw::STREAMTYPE_COLOR;
-		case VK_FORMAT_R8_SNORM:
-		case VK_FORMAT_R8_SINT:
-		case VK_FORMAT_R8G8_SNORM:
-		case VK_FORMAT_R8G8_SINT:
-		case VK_FORMAT_R8G8B8A8_SNORM:
-		case VK_FORMAT_R8G8B8A8_SINT:
-		case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
-		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
-			return sw::STREAMTYPE_SBYTE;
-		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
-			return sw::STREAMTYPE_2_10_10_10_UINT;
-		case VK_FORMAT_R16_UNORM:
-		case VK_FORMAT_R16_UINT:
-		case VK_FORMAT_R16G16_UNORM:
-		case VK_FORMAT_R16G16_UINT:
-		case VK_FORMAT_R16G16B16A16_UNORM:
-		case VK_FORMAT_R16G16B16A16_UINT:
-			return sw::STREAMTYPE_USHORT;
-		case VK_FORMAT_R16_SNORM:
-		case VK_FORMAT_R16_SINT:
-		case VK_FORMAT_R16G16_SNORM:
-		case VK_FORMAT_R16G16_SINT:
-		case VK_FORMAT_R16G16B16A16_SNORM:
-		case VK_FORMAT_R16G16B16A16_SINT:
-			return sw::STREAMTYPE_SHORT;
-		case VK_FORMAT_R16_SFLOAT:
-		case VK_FORMAT_R16G16_SFLOAT:
-		case VK_FORMAT_R16G16B16A16_SFLOAT:
-			return sw::STREAMTYPE_HALF;
-		case VK_FORMAT_R32_UINT:
-		case VK_FORMAT_R32G32_UINT:
-		case VK_FORMAT_R32G32B32_UINT:
-		case VK_FORMAT_R32G32B32A32_UINT:
-			return sw::STREAMTYPE_UINT;
-		case VK_FORMAT_R32_SINT:
-		case VK_FORMAT_R32G32_SINT:
-		case VK_FORMAT_R32G32B32_SINT:
-		case VK_FORMAT_R32G32B32A32_SINT:
-			return sw::STREAMTYPE_INT;
-		case VK_FORMAT_R32_SFLOAT:
-		case VK_FORMAT_R32G32_SFLOAT:
-		case VK_FORMAT_R32G32B32_SFLOAT:
-		case VK_FORMAT_R32G32B32A32_SFLOAT:
-			return sw::STREAMTYPE_FLOAT;
-		default:
-			UNIMPLEMENTED("format");
-	}
-
-	return sw::STREAMTYPE_BYTE;
-}
-
-unsigned char getNumberOfChannels(VkFormat format)
-{
-	switch(format)
-	{
-		case VK_FORMAT_R8_UNORM:
-		case VK_FORMAT_R8_SNORM:
-		case VK_FORMAT_R8_UINT:
-		case VK_FORMAT_R8_SINT:
-		case VK_FORMAT_R16_UNORM:
-		case VK_FORMAT_R16_SNORM:
-		case VK_FORMAT_R16_UINT:
-		case VK_FORMAT_R16_SINT:
-		case VK_FORMAT_R16_SFLOAT:
-		case VK_FORMAT_R32_UINT:
-		case VK_FORMAT_R32_SINT:
-		case VK_FORMAT_R32_SFLOAT:
-			return 1;
-		case VK_FORMAT_R8G8_UNORM:
-		case VK_FORMAT_R8G8_SNORM:
-		case VK_FORMAT_R8G8_UINT:
-		case VK_FORMAT_R8G8_SINT:
-		case VK_FORMAT_R16G16_UNORM:
-		case VK_FORMAT_R16G16_SNORM:
-		case VK_FORMAT_R16G16_UINT:
-		case VK_FORMAT_R16G16_SINT:
-		case VK_FORMAT_R16G16_SFLOAT:
-		case VK_FORMAT_R32G32_UINT:
-		case VK_FORMAT_R32G32_SINT:
-		case VK_FORMAT_R32G32_SFLOAT:
-			return 2;
-		case VK_FORMAT_R32G32B32_UINT:
-		case VK_FORMAT_R32G32B32_SINT:
-		case VK_FORMAT_R32G32B32_SFLOAT:
-			return 3;
-		case VK_FORMAT_R8G8B8A8_UNORM:
-		case VK_FORMAT_R8G8B8A8_SNORM:
-		case VK_FORMAT_R8G8B8A8_UINT:
-		case VK_FORMAT_R8G8B8A8_SINT:
-		case VK_FORMAT_B8G8R8A8_UNORM:
-		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
-		case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
-		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
-		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
-		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
-		case VK_FORMAT_R16G16B16A16_UNORM:
-		case VK_FORMAT_R16G16B16A16_SNORM:
-		case VK_FORMAT_R16G16B16A16_UINT:
-		case VK_FORMAT_R16G16B16A16_SINT:
-		case VK_FORMAT_R16G16B16A16_SFLOAT:
-		case VK_FORMAT_R32G32B32A32_UINT:
-		case VK_FORMAT_R32G32B32A32_SINT:
-		case VK_FORMAT_R32G32B32A32_SFLOAT:
-			return 4;
-		default:
-			UNIMPLEMENTED("format");
-	}
-
-	return 0;
-}
-
 // preprocessSpirv applies and freezes specializations into constants, and inlines all functions.
 std::vector<uint32_t> preprocessSpirv(
     std::vector<uint32_t> const &code,
@@ -333,9 +207,7 @@
 	{
 		auto const &desc = vertexInputState->pVertexAttributeDescriptions[i];
 		sw::Stream &input = context.input[desc.location];
-		input.count = getNumberOfChannels(desc.format);
-		input.type = getStreamType(desc.format);
-		input.normalized = !vk::Format(desc.format).isUnnormalizedInteger();
+		input.format = desc.format;
 		input.offset = desc.offset;
 		input.binding = desc.binding;
 		input.vertexStride = vertexStrides[desc.binding];