Implement pack/unpack extended instructions
- GLSLstd450PackSnorm4x8
- GLSLstd450PackUnorm4x8
- GLSLstd450PackSnorm2x16
- GLSLstd450PackUnorm2x16
- GLSLstd450PackHalf2x16
- GLSLstd450UnpackSnorm4x8
- GLSLstd450UnpackUnorm4x8
- GLSLstd450UnpackSnorm2x16
- GLSLstd450UnpackUnorm2x16
- GLSLstd450UnpackHalf2x16
Test: dEQP-VK.glsl.builtin.function.pack_unpack.*
Bug: b/126873455
Change-Id: I0c765bc215d43f894cad91c7ee06b5e3af3f75db
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/28433
Tested-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 51dc530..8a8077a 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -3090,6 +3090,95 @@
}
break;
}
+ case GLSLstd450PackSnorm4x8:
+ {
+ auto val = GenericValue(this, routine, insn.word(5));
+ dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+ SIMD::Int(0xFF)) |
+ ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+ SIMD::Int(0xFF)) << 8) |
+ ((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+ SIMD::Int(0xFF)) << 16) |
+ ((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
+ SIMD::Int(0xFF)) << 24));
+ break;
+ }
+ case GLSLstd450PackUnorm4x8:
+ {
+ auto val = GenericValue(this, routine, insn.word(5));
+ dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
+ ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
+ ((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
+ ((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24));
+ break;
+ }
+ case GLSLstd450PackSnorm2x16:
+ {
+ auto val = GenericValue(this, routine, insn.word(5));
+ dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
+ SIMD::Int(0xFFFF)) |
+ ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
+ SIMD::Int(0xFFFF)) << 16));
+ break;
+ }
+ case GLSLstd450PackUnorm2x16:
+ {
+ auto val = GenericValue(this, routine, insn.word(5));
+ dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
+ SIMD::UInt(0xFFFF)) |
+ ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
+ SIMD::UInt(0xFFFF)) << 16));
+ break;
+ }
+ case GLSLstd450PackHalf2x16:
+ {
+ auto val = GenericValue(this, routine, insn.word(5));
+ dst.move(0, FloatToHalfBits(val.UInt(0), false) | FloatToHalfBits(val.UInt(1), true));
+ break;
+ }
+ case GLSLstd450UnpackSnorm4x8:
+ {
+ auto val = GenericValue(this, routine, insn.word(5));
+ dst.move(0, Min(Max(SIMD::Float(((val.Int(0)<<24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ dst.move(1, Min(Max(SIMD::Float(((val.Int(0)<<16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ dst.move(2, Min(Max(SIMD::Float(((val.Int(0)<<8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ break;
+ }
+ case GLSLstd450UnpackUnorm4x8:
+ {
+ auto val = GenericValue(this, routine, insn.word(5));
+ dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(1, SIMD::Float(((val.UInt(0)>>8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(2, SIMD::Float(((val.UInt(0)>>16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
+ dst.move(3, SIMD::Float(((val.UInt(0)>>24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
+ break;
+ }
+ case GLSLstd450UnpackSnorm2x16:
+ {
+ auto val = GenericValue(this, routine, insn.word(5));
+ // clamp(f / 32767.0, -1.0, 1.0)
+ dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) *
+ SIMD::Float(1.0f / float(0x7FFF0000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)),
+ SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
+ break;
+ }
+ case GLSLstd450UnpackUnorm2x16:
+ {
+ auto val = GenericValue(this, routine, insn.word(5));
+ // f / 65535.0
+ dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000)));
+ dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000)));
+ break;
+ }
+ case GLSLstd450UnpackHalf2x16:
+ {
+ auto val = GenericValue(this, routine, insn.word(5));
+ dst.move(0, HalfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
+ dst.move(1, HalfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
+ break;
+ }
default:
UNIMPLEMENTED("Unhandled ExtInst %d", extInstIndex);
}
@@ -3124,6 +3213,43 @@
return d;
}
+ SIMD::UInt SpirvShader::FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const
+ {
+ static const uint32_t mask_sign = 0x80000000u;
+ static const uint32_t mask_round = ~0xfffu;
+ static const uint32_t c_f32infty = 255 << 23;
+ static const uint32_t c_magic = 15 << 23;
+ static const uint32_t c_nanbit = 0x200;
+ static const uint32_t c_infty_as_fp16 = 0x7c00;
+ static const uint32_t c_clamp = (31 << 23) - 0x1000;
+
+ SIMD::UInt justsign = SIMD::UInt(mask_sign) & floatBits;
+ SIMD::UInt absf = floatBits ^ justsign;
+ SIMD::UInt b_isnormal = CmpNLE(SIMD::UInt(c_f32infty), absf);
+
+ // Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf
+ // instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation)
+ SIMD::UInt joined = ((((As<SIMD::UInt>(Min(As<SIMD::Float>(absf & SIMD::UInt(mask_round)) * As<SIMD::Float>(SIMD::UInt(c_magic)),
+ As<SIMD::Float>(SIMD::UInt(c_clamp))))) - SIMD::UInt(mask_round)) >> 13) & b_isnormal) |
+ ((b_isnormal ^ SIMD::UInt(0xFFFFFFFF)) & ((CmpNLE(absf, SIMD::UInt(c_f32infty)) & SIMD::UInt(c_nanbit)) |
+ SIMD::UInt(c_infty_as_fp16)));
+
+ return storeInUpperBits ? ((joined << 16) | justsign) : joined | (justsign >> 16);
+ }
+
+ SIMD::UInt SpirvShader::HalfToFloatBits(SIMD::UInt halfBits) const
+ {
+ static const uint32_t mask_nosign = 0x7FFF;
+ static const uint32_t magic = (254 - 15) << 23;
+ static const uint32_t was_infnan = 0x7BFF;
+ static const uint32_t exp_infnan = 255 << 23;
+
+ SIMD::UInt expmant = halfBits & SIMD::UInt(mask_nosign);
+ return As<SIMD::UInt>(As<SIMD::Float>(expmant << 13) * As<SIMD::Float>(SIMD::UInt(magic))) |
+ ((halfBits ^ SIMD::UInt(expmant)) << 16) |
+ (CmpNLE(As<SIMD::UInt>(expmant), SIMD::UInt(was_infnan)) & SIMD::UInt(exp_infnan));
+ }
+
SpirvShader::EmitResult SpirvShader::EmitAny(InsnIterator insn, EmitState *state) const
{
auto routine = state->routine;
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 35ec8f9..494ea6b 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -691,6 +691,8 @@
// Helper as we often need to take dot products as part of doing other things.
SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const;
+ SIMD::UInt FloatToHalfBits(SIMD::UInt floatBits, bool storeInUpperBits) const;
+ SIMD::UInt HalfToFloatBits(SIMD::UInt halfBits) const;
};
class SpirvRoutine