third_party/llvm-16.0/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp - SwiftShader - Git at Google

 //===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // Define several functions to decode x86 specific shuffle semantics using
 // constants from the constant pool.
 //
 //===----------------------------------------------------------------------===//

 #include "X86ShuffleDecodeConstantPool.h"
 #include "MCTargetDesc/X86ShuffleDecode.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/Constants.h"

 //===----------------------------------------------------------------------===//
 //  Vector Mask Decoding
 //===----------------------------------------------------------------------===//

 namespace llvm {

 static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
                                 APInt &UndefElts,
                                 SmallVectorImpl<uint64_t> &RawMask) {
   // It is not an error for shuffle masks to not be a vector of
   // MaskEltSizeInBits because the constant pool uniques constants by their
   // bit representation.
   // e.g. the following take up the same space in the constant pool:
   //   i128 -170141183420855150465331762880109871104
   //
   //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
   //
   //   <4 x i32> <i32 -2147483648, i32 -2147483648,
   //              i32 -2147483648, i32 -2147483648>
   auto *CstTy = dyn_cast<FixedVectorType>(C->getType());
   if (!CstTy)
     return false;

   Type *CstEltTy = CstTy->getElementType();
   if (!CstEltTy->isIntegerTy())
     return false;

   unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
   unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
   unsigned NumCstElts = CstTy->getNumElements();

   assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
          "Unaligned shuffle mask size");

   unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
   UndefElts = APInt(NumMaskElts, 0);
   RawMask.resize(NumMaskElts, 0);

   // Fast path - if the constants match the mask size then copy direct.
   if (MaskEltSizeInBits == CstEltSizeInBits) {
     assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size");
     for (unsigned i = 0; i != NumMaskElts; ++i) {
       Constant *COp = C->getAggregateElement(i);
       if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
         return false;

       if (isa<UndefValue>(COp)) {
         UndefElts.setBit(i);
         RawMask[i] = 0;
         continue;
       }

       auto *Elt = cast<ConstantInt>(COp);
       RawMask[i] = Elt->getValue().getZExtValue();
     }
     return true;
   }

   // Extract all the undef/constant element data and pack into single bitsets.
   APInt UndefBits(CstSizeInBits, 0);
   APInt MaskBits(CstSizeInBits, 0);
   for (unsigned i = 0; i != NumCstElts; ++i) {
     Constant *COp = C->getAggregateElement(i);
     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
       return false;

     unsigned BitOffset = i * CstEltSizeInBits;

     if (isa<UndefValue>(COp)) {
       UndefBits.setBits(BitOffset, BitOffset + CstEltSizeInBits);
       continue;
     }

     MaskBits.insertBits(cast<ConstantInt>(COp)->getValue(), BitOffset);
   }

   // Now extract the undef/constant bit data into the raw shuffle masks.
   for (unsigned i = 0; i != NumMaskElts; ++i) {
     unsigned BitOffset = i * MaskEltSizeInBits;
     APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);

     // Only treat the element as UNDEF if all bits are UNDEF, otherwise
     // treat it as zero.
     if (EltUndef.isAllOnes()) {
       UndefElts.setBit(i);
       RawMask[i] = 0;
       continue;
     }

     APInt EltBits = MaskBits.extractBits(MaskEltSizeInBits, BitOffset);
     RawMask[i] = EltBits.getZExtValue();
   }

   return true;
 }

 void DecodePSHUFBMask(const Constant *C, unsigned Width,
                       SmallVectorImpl<int> &ShuffleMask) {
   assert((Width == 128 || Width == 256 || Width == 512) &&
          C->getType()->getPrimitiveSizeInBits() >= Width &&
          "Unexpected vector size.");

   // The shuffle mask requires a byte vector.
   APInt UndefElts;
   SmallVector<uint64_t, 64> RawMask;
   if (!extractConstantMask(C, 8, UndefElts, RawMask))
     return;

   unsigned NumElts = Width / 8;
   assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
          "Unexpected number of vector elements.");

   for (unsigned i = 0; i != NumElts; ++i) {
     if (UndefElts[i]) {
       ShuffleMask.push_back(SM_SentinelUndef);
       continue;
     }

     uint64_t Element = RawMask[i];
     // If the high bit (7) of the byte is set, the element is zeroed.
     if (Element & (1 << 7))
       ShuffleMask.push_back(SM_SentinelZero);
     else {
       // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
       // lane of the vector we're inside.
       unsigned Base = i & ~0xf;

       // Only the least significant 4 bits of the byte are used.
       int Index = Base + (Element & 0xf);
       ShuffleMask.push_back(Index);
     }
   }
 }

 void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
                         SmallVectorImpl<int> &ShuffleMask) {
   assert((Width == 128 || Width == 256 || Width == 512) &&
          C->getType()->getPrimitiveSizeInBits() >= Width &&
          "Unexpected vector size.");
   assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");

   // The shuffle mask requires elements the same size as the target.
   APInt UndefElts;
   SmallVector<uint64_t, 16> RawMask;
   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
     return;

   unsigned NumElts = Width / ElSize;
   unsigned NumEltsPerLane = 128 / ElSize;
   assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) &&
          "Unexpected number of vector elements.");

   for (unsigned i = 0; i != NumElts; ++i) {
     if (UndefElts[i]) {
       ShuffleMask.push_back(SM_SentinelUndef);
       continue;
     }

     int Index = i & ~(NumEltsPerLane - 1);
     uint64_t Element = RawMask[i];
     if (ElSize == 64)
       Index += (Element >> 1) & 0x1;
     else
       Index += Element & 0x3;

     ShuffleMask.push_back(Index);
   }
 }

 void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
                          unsigned Width, SmallVectorImpl<int> &ShuffleMask) {
   Type *MaskTy = C->getType();
   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
   (void)MaskTySize;
   assert((MaskTySize == 128 || MaskTySize == 256) && Width >= MaskTySize &&
          "Unexpected vector size.");

   // The shuffle mask requires elements the same size as the target.
   APInt UndefElts;
   SmallVector<uint64_t, 8> RawMask;
   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
     return;

   unsigned NumElts = Width / ElSize;
   unsigned NumEltsPerLane = 128 / ElSize;
   assert((NumElts == 2 || NumElts == 4 || NumElts == 8) &&
          "Unexpected number of vector elements.");

   for (unsigned i = 0; i != NumElts; ++i) {
     if (UndefElts[i]) {
       ShuffleMask.push_back(SM_SentinelUndef);
       continue;
     }

     // VPERMIL2 Operation.
     // Bits[3] - Match Bit.
     // Bits[2:1] - (Per Lane) PD Shuffle Mask.
     // Bits[2:0] - (Per Lane) PS Shuffle Mask.
     uint64_t Selector = RawMask[i];
     unsigned MatchBit = (Selector >> 3) & 0x1;

     // M2Z[0:1]     MatchBit
     //   0Xb           X        Source selected by Selector index.
     //   10b           0        Source selected by Selector index.
     //   10b           1        Zero.
     //   11b           0        Zero.
     //   11b           1        Source selected by Selector index.
     if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) {
       ShuffleMask.push_back(SM_SentinelZero);
       continue;
     }

     int Index = i & ~(NumEltsPerLane - 1);
     if (ElSize == 64)
       Index += (Selector >> 1) & 0x1;
     else
       Index += Selector & 0x3;

     int Src = (Selector >> 2) & 0x1;
     Index += Src * NumElts;
     ShuffleMask.push_back(Index);
   }
 }

 void DecodeVPPERMMask(const Constant *C, unsigned Width,
                       SmallVectorImpl<int> &ShuffleMask) {
   Type *MaskTy = C->getType();
   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
   (void)MaskTySize;
   assert(Width == 128 && Width >= MaskTySize && "Unexpected vector size.");

   // The shuffle mask requires a byte vector.
   APInt UndefElts;
   SmallVector<uint64_t, 16> RawMask;
   if (!extractConstantMask(C, 8, UndefElts, RawMask))
     return;

   unsigned NumElts = Width / 8;
   assert(NumElts == 16 && "Unexpected number of vector elements.");

   for (unsigned i = 0; i != NumElts; ++i) {
     if (UndefElts[i]) {
       ShuffleMask.push_back(SM_SentinelUndef);
       continue;
     }

     // VPPERM Operation
     // Bits[4:0] - Byte Index (0 - 31)
     // Bits[7:5] - Permute Operation
     //
     // Permute Operation:
     // 0 - Source byte (no logical operation).
     // 1 - Invert source byte.
     // 2 - Bit reverse of source byte.
     // 3 - Bit reverse of inverted source byte.
     // 4 - 00h (zero - fill).
     // 5 - FFh (ones - fill).
     // 6 - Most significant bit of source byte replicated in all bit positions.
     // 7 - Invert most significant bit of source byte and replicate in all bit
     // positions.
     uint64_t Element = RawMask[i];
     uint64_t Index = Element & 0x1F;
     uint64_t PermuteOp = (Element >> 5) & 0x7;

     if (PermuteOp == 4) {
       ShuffleMask.push_back(SM_SentinelZero);
       continue;
     }
     if (PermuteOp != 0) {
       ShuffleMask.clear();
       return;
     }
     ShuffleMask.push_back((int)Index);
   }
 }

 } // namespace llvm
	//===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// Define several functions to decode x86 specific shuffle semantics using
	// constants from the constant pool.
	//
	//===----------------------------------------------------------------------===//

	#include "X86ShuffleDecodeConstantPool.h"
	#include "MCTargetDesc/X86ShuffleDecode.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/IR/Constants.h"

	//===----------------------------------------------------------------------===//
	// Vector Mask Decoding
	//===----------------------------------------------------------------------===//

	namespace llvm {

	static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
	APInt &UndefElts,
	SmallVectorImpl<uint64_t> &RawMask) {
	// It is not an error for shuffle masks to not be a vector of
	// MaskEltSizeInBits because the constant pool uniques constants by their
	// bit representation.
	// e.g. the following take up the same space in the constant pool:
	// i128 -170141183420855150465331762880109871104
	//
	// <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
	//
	// <4 x i32> <i32 -2147483648, i32 -2147483648,
	// i32 -2147483648, i32 -2147483648>
	auto *CstTy = dyn_cast<FixedVectorType>(C->getType());
	if (!CstTy)
	return false;

	Type *CstEltTy = CstTy->getElementType();
	if (!CstEltTy->isIntegerTy())
	return false;

	unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
	unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
	unsigned NumCstElts = CstTy->getNumElements();

	assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
	"Unaligned shuffle mask size");

	unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
	UndefElts = APInt(NumMaskElts, 0);
	RawMask.resize(NumMaskElts, 0);

	// Fast path - if the constants match the mask size then copy direct.
	if (MaskEltSizeInBits == CstEltSizeInBits) {
	assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size");
	for (unsigned i = 0; i != NumMaskElts; ++i) {
	Constant *COp = C->getAggregateElement(i);
	if (!COp \|\| (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
	return false;

	if (isa<UndefValue>(COp)) {
	UndefElts.setBit(i);
	RawMask[i] = 0;
	continue;
	}

	auto *Elt = cast<ConstantInt>(COp);
	RawMask[i] = Elt->getValue().getZExtValue();
	}
	return true;
	}

	// Extract all the undef/constant element data and pack into single bitsets.
	APInt UndefBits(CstSizeInBits, 0);
	APInt MaskBits(CstSizeInBits, 0);
	for (unsigned i = 0; i != NumCstElts; ++i) {
	Constant *COp = C->getAggregateElement(i);
	if (!COp \|\| (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
	return false;

	unsigned BitOffset = i * CstEltSizeInBits;

	if (isa<UndefValue>(COp)) {
	UndefBits.setBits(BitOffset, BitOffset + CstEltSizeInBits);
	continue;
	}

	MaskBits.insertBits(cast<ConstantInt>(COp)->getValue(), BitOffset);
	}

	// Now extract the undef/constant bit data into the raw shuffle masks.
	for (unsigned i = 0; i != NumMaskElts; ++i) {
	unsigned BitOffset = i * MaskEltSizeInBits;
	APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);

	// Only treat the element as UNDEF if all bits are UNDEF, otherwise
	// treat it as zero.
	if (EltUndef.isAllOnes()) {
	UndefElts.setBit(i);
	RawMask[i] = 0;
	continue;
	}

	APInt EltBits = MaskBits.extractBits(MaskEltSizeInBits, BitOffset);
	RawMask[i] = EltBits.getZExtValue();
	}

	return true;
	}

	void DecodePSHUFBMask(const Constant *C, unsigned Width,
	SmallVectorImpl<int> &ShuffleMask) {
	assert((Width == 128 \|\| Width == 256 \|\| Width == 512) &&
	C->getType()->getPrimitiveSizeInBits() >= Width &&
	"Unexpected vector size.");

	// The shuffle mask requires a byte vector.
	APInt UndefElts;
	SmallVector<uint64_t, 64> RawMask;
	if (!extractConstantMask(C, 8, UndefElts, RawMask))
	return;

	unsigned NumElts = Width / 8;
	assert((NumElts == 16 \|\| NumElts == 32 \|\| NumElts == 64) &&
	"Unexpected number of vector elements.");

	for (unsigned i = 0; i != NumElts; ++i) {
	if (UndefElts[i]) {
	ShuffleMask.push_back(SM_SentinelUndef);
	continue;
	}

	uint64_t Element = RawMask[i];
	// If the high bit (7) of the byte is set, the element is zeroed.
	if (Element & (1 << 7))
	ShuffleMask.push_back(SM_SentinelZero);
	else {
	// For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
	// lane of the vector we're inside.
	unsigned Base = i & ~0xf;

	// Only the least significant 4 bits of the byte are used.
	int Index = Base + (Element & 0xf);
	ShuffleMask.push_back(Index);
	}
	}
	}

	void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
	SmallVectorImpl<int> &ShuffleMask) {
	assert((Width == 128 \|\| Width == 256 \|\| Width == 512) &&
	C->getType()->getPrimitiveSizeInBits() >= Width &&
	"Unexpected vector size.");
	assert((ElSize == 32 \|\| ElSize == 64) && "Unexpected vector element size.");

	// The shuffle mask requires elements the same size as the target.
	APInt UndefElts;
	SmallVector<uint64_t, 16> RawMask;
	if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
	return;

	unsigned NumElts = Width / ElSize;
	unsigned NumEltsPerLane = 128 / ElSize;
	assert((NumElts == 2 \|\| NumElts == 4 \|\| NumElts == 8 \|\| NumElts == 16) &&
	"Unexpected number of vector elements.");

	for (unsigned i = 0; i != NumElts; ++i) {
	if (UndefElts[i]) {
	ShuffleMask.push_back(SM_SentinelUndef);
	continue;
	}

	int Index = i & ~(NumEltsPerLane - 1);
	uint64_t Element = RawMask[i];
	if (ElSize == 64)
	Index += (Element >> 1) & 0x1;
	else
	Index += Element & 0x3;

	ShuffleMask.push_back(Index);
	}
	}

	void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
	unsigned Width, SmallVectorImpl<int> &ShuffleMask) {
	Type *MaskTy = C->getType();
	unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
	(void)MaskTySize;
	assert((MaskTySize == 128 \|\| MaskTySize == 256) && Width >= MaskTySize &&
	"Unexpected vector size.");

	// The shuffle mask requires elements the same size as the target.
	APInt UndefElts;
	SmallVector<uint64_t, 8> RawMask;
	if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
	return;

	unsigned NumElts = Width / ElSize;
	unsigned NumEltsPerLane = 128 / ElSize;
	assert((NumElts == 2 \|\| NumElts == 4 \|\| NumElts == 8) &&
	"Unexpected number of vector elements.");

	for (unsigned i = 0; i != NumElts; ++i) {
	if (UndefElts[i]) {
	ShuffleMask.push_back(SM_SentinelUndef);
	continue;
	}

	// VPERMIL2 Operation.
	// Bits[3] - Match Bit.
	// Bits[2:1] - (Per Lane) PD Shuffle Mask.
	// Bits[2:0] - (Per Lane) PS Shuffle Mask.
	uint64_t Selector = RawMask[i];
	unsigned MatchBit = (Selector >> 3) & 0x1;

	// M2Z[0:1] MatchBit
	// 0Xb X Source selected by Selector index.
	// 10b 0 Source selected by Selector index.
	// 10b 1 Zero.
	// 11b 0 Zero.
	// 11b 1 Source selected by Selector index.
	if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) {
	ShuffleMask.push_back(SM_SentinelZero);
	continue;
	}

	int Index = i & ~(NumEltsPerLane - 1);
	if (ElSize == 64)
	Index += (Selector >> 1) & 0x1;
	else
	Index += Selector & 0x3;

	int Src = (Selector >> 2) & 0x1;
	Index += Src * NumElts;
	ShuffleMask.push_back(Index);
	}
	}

	void DecodeVPPERMMask(const Constant *C, unsigned Width,
	SmallVectorImpl<int> &ShuffleMask) {
	Type *MaskTy = C->getType();
	unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
	(void)MaskTySize;
	assert(Width == 128 && Width >= MaskTySize && "Unexpected vector size.");

	// The shuffle mask requires a byte vector.
	APInt UndefElts;
	SmallVector<uint64_t, 16> RawMask;
	if (!extractConstantMask(C, 8, UndefElts, RawMask))
	return;

	unsigned NumElts = Width / 8;
	assert(NumElts == 16 && "Unexpected number of vector elements.");

	for (unsigned i = 0; i != NumElts; ++i) {
	if (UndefElts[i]) {
	ShuffleMask.push_back(SM_SentinelUndef);
	continue;
	}

	// VPPERM Operation
	// Bits[4:0] - Byte Index (0 - 31)
	// Bits[7:5] - Permute Operation
	//
	// Permute Operation:
	// 0 - Source byte (no logical operation).
	// 1 - Invert source byte.
	// 2 - Bit reverse of source byte.
	// 3 - Bit reverse of inverted source byte.
	// 4 - 00h (zero - fill).
	// 5 - FFh (ones - fill).
	// 6 - Most significant bit of source byte replicated in all bit positions.
	// 7 - Invert most significant bit of source byte and replicate in all bit
	// positions.
	uint64_t Element = RawMask[i];
	uint64_t Index = Element & 0x1F;
	uint64_t PermuteOp = (Element >> 5) & 0x7;

	if (PermuteOp == 4) {
	ShuffleMask.push_back(SM_SentinelZero);
	continue;
	}
	if (PermuteOp != 0) {
	ShuffleMask.clear();
	return;
	}
	ShuffleMask.push_back((int)Index);
	}
	}

	} // namespace llvm