Create a generic vec<T, N> class

This prepares for having other vector sizes (e.g. byte8) support member
methods such as broadcast assignment.

Note the partial specialization for N=4, to have x,y,z,w members.

Bug: b/146224130
Change-Id: I09a341812ddfa14d247e94fd3f9345db951824ed
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/39554
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/System/Types.hpp b/src/System/Types.hpp
index 5cc67d3..e70344f 100644
--- a/src/System/Types.hpp
+++ b/src/System/Types.hpp
@@ -48,64 +48,131 @@
 typedef ALIGN(2, uint16_t) word;
 typedef ALIGN(4, uint32_t) dword;
 typedef ALIGN(8, uint64_t) qword;
-typedef ALIGN(16, uint64_t) qword2[2];
-typedef ALIGN(4, uint8_t) byte4[4];
-typedef ALIGN(8, uint8_t) byte8[8];
-typedef ALIGN(16, uint8_t) byte16[16];
-typedef ALIGN(8, uint16_t) word4[4];
-typedef ALIGN(8, uint32_t) dword2[2];
-typedef ALIGN(16, uint32_t) dword4[4];
-typedef ALIGN(16, uint64_t) xword[2];
-
 typedef ALIGN(1, int8_t) sbyte;
-typedef ALIGN(4, int8_t) sbyte4[4];
-typedef ALIGN(8, int8_t) sbyte8[8];
-typedef ALIGN(16, int8_t) sbyte16[16];
-typedef ALIGN(8, short) short4[4];
-typedef ALIGN(8, unsigned short) ushort4[4];
-typedef ALIGN(16, short) short8[8];
-typedef ALIGN(16, unsigned short) ushort8[8];
-typedef ALIGN(8, int) int2[2];
-typedef ALIGN(8, unsigned int) uint2[2];
-typedef ALIGN(16, unsigned int) uint4[4];
 
-typedef ALIGN(8, float) float2[2];
-
-template<typename T>
-struct alignas(sizeof(T) * 4) vec4
+template<typename T, int N>
+struct alignas(sizeof(T)* N) vec
 {
-	T x;
-	T y;
-	T z;
-	T w;
+	vec() = default;
+
+	vec(T replicate)
+	{
+		for(int i = 0; i < N; i++)
+		{
+			v[i] = replicate;
+		}
+	}
+
+	template<typename ... ARGS>
+	constexpr vec(T arg0, ARGS ... args)
+		: v{ arg0, args... }
+	{
+	}
 
 	T &operator[](int i)
 	{
-		return (&x)[i];
+		return v[i];
 	}
 
 	const T &operator[](int i) const
 	{
-		return (&x)[i];
+		return v[i];
 	}
 
-	bool operator!=(const vec4 &rhs)
-	{
-		return x != rhs.x || y != rhs.y || z != rhs.z || w != rhs.w;
-	}
-
-	bool operator==(const vec4 &rhs)
-	{
-		return x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w;
-	}
+	T v[N];
 };
 
+template<typename T>
+struct alignas(sizeof(T) * 4) vec<T, 4>
+{
+	vec() = default;
+
+	constexpr vec(T replicate)
+		: x(replicate), y(replicate), z(replicate), w(replicate)
+	{
+	}
+
+	constexpr vec(T x, T y, T z, T w)
+		: x(x), y(y), z(z), w(w)
+	{
+	}
+
+	T &operator[](int i)
+	{
+		return v[i];
+	}
+
+	const T &operator[](int i) const
+	{
+		return v[i];
+	}
+
+	union
+	{
+		T v[4];
+
+		struct
+		{
+			T x;
+			T y;
+			T z;
+			T w;
+		};
+	};
+};
+
+template<typename T, int N>
+bool operator==(const vec<T, N>& a, const vec<T, N>& b)
+{
+	for(int i = 0; i < N; i++)
+	{
+		if(a.v[i] != b.v[i])
+		{
+			return false;
+		}
+	}
+
+	return true;
+}
+
+template<typename T, int N>
+bool operator!=(const vec<T, N>& a, const vec<T, N>& b)
+{
+	return !(a == b);
+}
+
+template<typename T> using vec2 = vec<T, 2>;
+template<typename T> using vec4 = vec<T, 4>;
+template<typename T> using vec8 = vec<T, 8>;
+template<typename T> using vec16 = vec<T, 16>;
+
+using int2 = vec2<int>;
+using uint2 = vec2<unsigned int>;
+using float2 = vec2<float>;
+using dword2 = vec2<dword>;
+using qword2 = vec2<qword>;
+
 using int4 = vec4<int>;
+using uint4 = vec4<unsigned int>;
 using float4 = vec4<float>;
+using byte4 = vec4<byte>;
+using sbyte4 = vec4<sbyte>;
+using short4 = vec4<short>;
+using ushort4 = vec4<unsigned short>;
+using word4 = vec4<word>;
+using dword4 = vec4<dword>;
+
+using byte8 = vec8<byte>;
+using sbyte8 = vec8<sbyte>;
+using short8 = vec8<short>;
+using ushort8 = vec8<unsigned short>;
+
+using byte16 = vec16<byte>;
+using sbyte16 = vec16<sbyte>;
 
 inline constexpr float4 vector(float x, float y, float z, float w)
 {
-	return { x, y, z, w };
+	return float4{ x, y, z, w };
 }
 
 inline constexpr float4 replicate(float f)