| // Copyright (c) 2025 LunarG Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "source/opt/canonicalize_ids_pass.h" |
| |
| #include <algorithm> |
| #include <limits> |
| |
| namespace spvtools { |
| namespace opt { |
| |
| Pass::Status CanonicalizeIdsPass::Process() { |
| // Initialize the new ID map. |
| new_id_.resize(GetBound(), unused_); |
| |
| // Scan the IDs and set to unmapped. |
| ScanIds(); |
| |
| // Create new IDs for types and consts. |
| CanonicalizeTypeAndConst(); |
| |
| // Create new IDs for names. |
| CanonicalizeNames(); |
| |
| // Create new IDs for functions. |
| CanonicalizeFunctions(); |
| |
| // Create new IDs for everything else. |
| CanonicalizeRemainders(); |
| |
| // Apply the new IDs to the module. |
| auto const modified = ApplyMap(); |
| |
| // Update bound in the header. |
| if (modified) { |
| UpdateBound(); |
| } |
| |
| return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange; |
| } |
| |
| void CanonicalizeIdsPass::ScanIds() { |
| get_module()->ForEachInst( |
| [this](Instruction* inst) { |
| // Look for types and constants. |
| if (spvOpcodeGeneratesType(inst->opcode()) || |
| spvOpcodeIsConstant(inst->opcode())) { |
| type_and_const_ids_.push_back(inst->result_id()); |
| SetNewId(inst->result_id(), unmapped_); |
| } |
| // Look for names. |
| else if (inst->opcode() == spv::Op::OpName) { |
| // store name string in map so that we can compute the hash later |
| auto const name = inst->GetOperand(1).AsString(); |
| auto const target = inst->GetSingleWordInOperand(0); |
| name_ids_[name] = target; |
| SetNewId(target, unmapped_); |
| } |
| // Look for function IDs. |
| else if (inst->opcode() == spv::Op::OpFunction) { |
| auto const res_id = inst->result_id(); |
| function_ids_.push_back(res_id); |
| SetNewId(res_id, unmapped_); |
| } |
| // Look for remaining result IDs. |
| else if (inst->HasResultId()) { |
| auto const res_id = inst->result_id(); |
| SetNewId(res_id, unmapped_); |
| } |
| }, |
| true); |
| } |
| |
| void CanonicalizeIdsPass::CanonicalizeTypeAndConst() { |
| // Remap type IDs. |
| static constexpr std::uint32_t soft_type_id_limit = 3011; // small prime. |
| static constexpr std::uint32_t first_mapped_id = 8; // offset into ID space |
| for (auto const id : type_and_const_ids_) { |
| if (!IsOldIdUnmapped(id)) { |
| continue; |
| } |
| |
| // Compute the hash value. |
| auto const hash_value = HashTypeAndConst(id); |
| if (hash_value != unmapped_) { |
| SetNewId(id, hash_value % soft_type_id_limit + first_mapped_id); |
| } |
| } |
| } |
| |
| // Hash types to canonical values. This can return ID collisions (it's a bit |
| // inevitable): it's up to the caller to handle that gracefully. |
| spv::Id CanonicalizeIdsPass::HashTypeAndConst(spv::Id const id) const { |
| spv::Id value = 0; |
| |
| auto const inst = get_def_use_mgr()->GetDef(id); |
| auto const op_code = inst->opcode(); |
| switch (op_code) { |
| case spv::Op::OpTypeVoid: |
| value = 0; |
| break; |
| case spv::Op::OpTypeBool: |
| value = 1; |
| break; |
| case spv::Op::OpTypeInt: { |
| auto const signedness = inst->GetSingleWordOperand(2); |
| value = 3 + signedness; |
| break; |
| } |
| case spv::Op::OpTypeFloat: |
| value = 5; |
| break; |
| case spv::Op::OpTypeVector: { |
| auto const component_type = inst->GetSingleWordOperand(1); |
| auto const component_count = inst->GetSingleWordOperand(2); |
| value = 6 + HashTypeAndConst(component_type) * (component_count - 1); |
| break; |
| } |
| case spv::Op::OpTypeMatrix: { |
| auto const column_type = inst->GetSingleWordOperand(1); |
| auto const column_count = inst->GetSingleWordOperand(2); |
| value = 30 + HashTypeAndConst(column_type) * (column_count - 1); |
| break; |
| } |
| case spv::Op::OpTypeImage: { |
| // TODO: Why isn't the format used to compute the hash value? |
| auto const sampled_type = inst->GetSingleWordOperand(1); |
| auto const dim = inst->GetSingleWordOperand(2); |
| auto const depth = inst->GetSingleWordOperand(3); |
| auto const arrayed = inst->GetSingleWordOperand(4); |
| auto const ms = inst->GetSingleWordOperand(5); |
| auto const sampled = inst->GetSingleWordOperand(6); |
| value = 120 + HashTypeAndConst(sampled_type) + dim + depth * 8 * 16 + |
| arrayed * 4 * 16 + ms * 2 * 16 + sampled * 1 * 16; |
| break; |
| } |
| case spv::Op::OpTypeSampler: |
| value = 500; |
| break; |
| case spv::Op::OpTypeSampledImage: |
| value = 502; |
| break; |
| case spv::Op::OpTypeArray: { |
| auto const element_type = inst->GetSingleWordOperand(1); |
| auto const length = inst->GetSingleWordOperand(2); |
| value = 501 + HashTypeAndConst(element_type) * length; |
| break; |
| } |
| case spv::Op::OpTypeRuntimeArray: { |
| auto const element_type = inst->GetSingleWordOperand(1); |
| value = 5000 + HashTypeAndConst(element_type); |
| break; |
| } |
| case spv::Op::OpTypeStruct: |
| value = 10000; |
| for (uint32_t w = 1; w < inst->NumOperandWords(); ++w) { |
| value += (w + 1) * HashTypeAndConst(inst->GetSingleWordOperand(w)); |
| } |
| break; |
| case spv::Op::OpTypeOpaque: { |
| // TODO: Name is a literal that may have more than one word. |
| auto const name = inst->GetSingleWordOperand(1); |
| value = 6000 + name; |
| break; |
| } |
| case spv::Op::OpTypePointer: { |
| auto const type = inst->GetSingleWordOperand(2); |
| value = 100000 + HashTypeAndConst(type); |
| break; |
| } |
| case spv::Op::OpTypeFunction: |
| value = 200000; |
| for (uint32_t w = 1; w < inst->NumOperandWords(); ++w) { |
| value += (w + 1) * HashTypeAndConst(inst->GetSingleWordOperand(w)); |
| } |
| break; |
| case spv::Op::OpTypeEvent: |
| value = 300000; |
| break; |
| case spv::Op::OpTypeDeviceEvent: |
| value = 300001; |
| break; |
| case spv::Op::OpTypeReserveId: |
| value = 300002; |
| break; |
| case spv::Op::OpTypeQueue: |
| value = 300003; |
| break; |
| case spv::Op::OpTypePipe: |
| value = 300004; |
| break; |
| case spv::Op::OpTypePipeStorage: |
| value = 300005; |
| break; |
| case spv::Op::OpTypeNamedBarrier: |
| value = 300006; |
| break; |
| case spv::Op::OpConstantTrue: |
| value = 300007; |
| break; |
| case spv::Op::OpConstantFalse: |
| value = 300008; |
| break; |
| case spv::Op::OpTypeRayQueryKHR: |
| value = 300009; |
| break; |
| case spv::Op::OpTypeAccelerationStructureKHR: |
| value = 300010; |
| break; |
| // Don't map the following types. |
| // TODO: These types were not remapped in the glslang version of the |
| // remapper. Support should be added as necessary. |
| case spv::Op::OpTypeCooperativeMatrixNV: |
| case spv::Op::OpTypeCooperativeMatrixKHR: |
| case spv::Op::OpTypeCooperativeVectorNV: |
| case spv::Op::OpTypeHitObjectNV: |
| case spv::Op::OpTypeUntypedPointerKHR: |
| case spv::Op::OpTypeNodePayloadArrayAMDX: |
| case spv::Op::OpTypeTensorLayoutNV: |
| case spv::Op::OpTypeTensorViewNV: |
| case spv::Op::OpTypeTensorARM: |
| case spv::Op::OpTypeTaskSequenceINTEL: |
| value = unmapped_; |
| break; |
| case spv::Op::OpConstant: { |
| auto const result_type = inst->GetSingleWordOperand(0); |
| value = 400011 + HashTypeAndConst(result_type); |
| auto const literal = inst->GetOperand(2); |
| for (uint32_t w = 0; w < literal.words.size(); ++w) { |
| value += (w + 3) * literal.words[w]; |
| } |
| break; |
| } |
| case spv::Op::OpConstantComposite: { |
| auto const result_type = inst->GetSingleWordOperand(0); |
| value = 300011 + HashTypeAndConst(result_type); |
| for (uint32_t w = 2; w < inst->NumOperandWords(); ++w) { |
| value += (w + 1) * HashTypeAndConst(inst->GetSingleWordOperand(w)); |
| } |
| break; |
| } |
| case spv::Op::OpConstantNull: { |
| auto const result_type = inst->GetSingleWordOperand(0); |
| value = 500009 + HashTypeAndConst(result_type); |
| break; |
| } |
| case spv::Op::OpConstantSampler: { |
| auto const result_type = inst->GetSingleWordOperand(0); |
| value = 600011 + HashTypeAndConst(result_type); |
| for (uint32_t w = 2; w < inst->NumOperandWords(); ++w) { |
| value += (w + 1) * inst->GetSingleWordOperand(w); |
| } |
| break; |
| } |
| // Don't map the following constants. |
| // TODO: These constants were not remapped in the glslang version of the |
| // remapper. Support should be added as necessary. |
| case spv::Op::OpConstantCompositeReplicateEXT: |
| case spv::Op::OpConstantFunctionPointerINTEL: |
| case spv::Op::OpConstantStringAMDX: |
| case spv::Op::OpSpecConstantTrue: |
| case spv::Op::OpSpecConstantFalse: |
| case spv::Op::OpSpecConstant: |
| case spv::Op::OpSpecConstantComposite: |
| case spv::Op::OpSpecConstantCompositeReplicateEXT: |
| case spv::Op::OpSpecConstantOp: |
| case spv::Op::OpSpecConstantStringAMDX: |
| value = unmapped_; |
| break; |
| // TODO: Add additional types/constants as needed. See |
| // spvOpcodeGeneratesType and spvOpcodeIsConstant. |
| default: |
| context()->consumer()(SPV_MSG_WARNING, "", {0, 0, 0}, |
| "unhandled opcode will not be canonicalized"); |
| break; |
| } |
| |
| return value; |
| } |
| |
| void CanonicalizeIdsPass::CanonicalizeNames() { |
| static constexpr std::uint32_t soft_type_id_limit = 3011; // Small prime. |
| static constexpr std::uint32_t first_mapped_id = |
| 3019; // Offset into ID space. |
| |
| for (auto const& [name, target] : name_ids_) { |
| if (!IsOldIdUnmapped(target)) { |
| continue; |
| } |
| |
| spv::Id hash_value = 1911; |
| for (const char c : name) { |
| hash_value = hash_value * 1009 + c; |
| } |
| |
| if (IsOldIdUnmapped(target)) { |
| SetNewId(target, hash_value % soft_type_id_limit + first_mapped_id); |
| } |
| } |
| } |
| |
| void CanonicalizeIdsPass::CanonicalizeFunctions() { |
| static constexpr std::uint32_t soft_type_id_limit = 19071; // Small prime. |
| static constexpr std::uint32_t first_mapped_id = |
| 6203; // Offset into ID space. |
| // Window size for context-sensitive canonicalization values |
| // Empirical best size from a single data set. TODO: Would be a good tunable. |
| // We essentially perform a little convolution around each instruction, |
| // to capture the flavor of nearby code, to hopefully match to similar |
| // code in other modules. |
| static const int32_t window_size = 2; |
| |
| for (auto const func_id : function_ids_) { |
| // Store the instructions and opcode hash values in vectors so that the |
| // window of instructions can be easily accessed and avoid having to |
| // recompute the hash value repeatedly in overlapping windows. |
| std::vector<Instruction*> insts; |
| std::vector<uint32_t> opcode_hashvals; |
| auto const func = context()->GetFunction(func_id); |
| func->WhileEachInst([&](Instruction* inst) { |
| insts.emplace_back(inst); |
| opcode_hashvals.emplace_back(HashOpCode(inst)); |
| return true; |
| }); |
| |
| // For every instruction in the function, compute the hash value using the |
| // instruction and a small window of surrounding instructions. |
| assert(insts.size() < (size_t)std::numeric_limits<int32_t>::max()); |
| for (int32_t i = 0; i < (int32_t)insts.size(); ++i) { |
| auto const inst = insts[i]; |
| if (!inst->HasResultId()) { |
| continue; |
| } |
| |
| auto const old_id = inst->result_id(); |
| if (!IsOldIdUnmapped(old_id)) { |
| continue; |
| } |
| |
| int32_t const lower_bound = std::max(0, i - window_size); |
| int32_t const upper_bound = |
| std::min((int32_t)insts.size() - 1, i + window_size); |
| spv::Id hash_value = func_id * 17; // Small prime. |
| // Include the hash value of the preceding instructions in the hash but |
| // don't include instructions before the OpFunction. |
| for (int32_t j = i - 1; j >= lower_bound; --j) { |
| auto const local_inst = insts[j]; |
| if (local_inst->opcode() == spv::Op::OpFunction) { |
| break; |
| } |
| |
| hash_value = hash_value * 30103 + |
| opcode_hashvals[j]; // 30103 is a semi-arbitrary prime. |
| } |
| |
| // Include the hash value of the subsequent instructions in the hash but |
| // don't include instructions past OpFunctionEnd. |
| for (int32_t j = i; j <= upper_bound; ++j) { |
| auto const local_inst = insts[j]; |
| if (local_inst->opcode() == spv::Op::OpFunctionEnd) { |
| break; |
| } |
| |
| hash_value = hash_value * 30103 + |
| opcode_hashvals[j]; // 30103 is a semiarbitrary prime. |
| } |
| |
| SetNewId(old_id, hash_value % soft_type_id_limit + first_mapped_id); |
| } |
| } |
| } |
| |
| spv::Id CanonicalizeIdsPass::HashOpCode(Instruction const* const inst) const { |
| auto const op_code = inst->opcode(); |
| std::uint32_t offset = 0; |
| if (op_code == spv::Op::OpExtInst) { |
| // offset is literal instruction |
| offset = inst->GetSingleWordOperand(3); |
| } |
| |
| return (std::uint32_t)op_code * 19 + offset; // 19 is a small prime. |
| } |
| |
| // Assign remaining IDs sequentially from remaining holes in the new ID space. |
| void CanonicalizeIdsPass::CanonicalizeRemainders() { |
| spv::Id next_id = 1; |
| for (uint32_t old_id = 0; old_id < new_id_.size(); ++old_id) { |
| if (IsOldIdUnmapped(old_id)) { |
| next_id = SetNewId(old_id, next_id); |
| } |
| } |
| } |
| |
| bool CanonicalizeIdsPass::ApplyMap() { |
| bool modified = false; |
| context()->module()->ForEachInst( |
| [this, &modified](Instruction* inst) { |
| for (auto operand = inst->begin(); operand != inst->end(); ++operand) { |
| const auto type = operand->type; |
| if (spvIsIdType(type)) { |
| uint32_t& id = operand->words[0]; |
| uint32_t const new_id = GetNewId(id); |
| if (new_id == unused_) { |
| continue; |
| } |
| |
| assert(new_id != unmapped_ && "new_id should not be unmapped_"); |
| |
| if (id != new_id) { |
| modified = true; |
| id = new_id; |
| if (type == SPV_OPERAND_TYPE_RESULT_ID) { |
| inst->SetResultId(new_id); |
| } else if (type == SPV_OPERAND_TYPE_TYPE_ID) { |
| inst->SetResultType(new_id); |
| } |
| } |
| } |
| } |
| }, |
| true); |
| |
| return modified; |
| } |
| |
| spv::Id CanonicalizeIdsPass::GetBound() const { |
| return context()->module()->id_bound(); |
| } |
| |
| void CanonicalizeIdsPass::UpdateBound() { |
| context()->module()->SetIdBound(context()->module()->ComputeIdBound()); |
| |
| context()->ResetFeatureManager(); |
| } |
| |
| // Set a new ID. If the new ID is alreadly claimed, the next consecutive ID |
| // will be claimed, mapped, and returned to the caller. |
| spv::Id CanonicalizeIdsPass::SetNewId(spv::Id const old_id, spv::Id new_id) { |
| assert(old_id < GetBound() && "don't remap an ID that is out of bounds"); |
| |
| if (old_id >= new_id_.size()) { |
| new_id_.resize(old_id + 1, unused_); |
| } |
| |
| if (new_id != unmapped_ && new_id != unused_) { |
| assert(!IsOldIdUnused(old_id) && "don't remap unused IDs"); |
| assert(IsOldIdUnmapped(old_id) && "don't remap already mapped IDs"); |
| |
| new_id = ClaimNewId(new_id); |
| } |
| |
| new_id_[old_id] = new_id; |
| |
| return new_id; |
| } |
| |
| // Helper function for SetNewID. Claim a new ID. If the new ID is already |
| // claimed, the next consecutive ID will be claimed and returned to the caller. |
| spv::Id CanonicalizeIdsPass::ClaimNewId(spv::Id new_id) { |
| // Return the ID if it's not taken. |
| auto iter = claimed_new_ids_.find(new_id); |
| if (iter != claimed_new_ids_.end()) { |
| // Otherwise, search for the next unused ID using our current iterator. |
| // Technically, it's a linear search across the set starting at the |
| // iterator, but it's not as bad as it would appear in practice assuming the |
| // hash values are well distributed. |
| iter = std::adjacent_find(iter, claimed_new_ids_.end(), [](int a, int b) { |
| return a + 1 != b; // Stop at the first non-consecutive pair. |
| }); |
| if (iter != claimed_new_ids_.end()) { |
| new_id = |
| *iter + 1; // We need the next ID after where the search stopped. |
| } else { |
| new_id = *(--iter) + 1; // We reached the end so we use the next ID. |
| } |
| } |
| |
| assert(!IsNewIdClaimed(new_id) && |
| "don't remap to an ID that is already claimed"); |
| iter = claimed_new_ids_.insert(iter, new_id); |
| assert(*iter == new_id); |
| |
| return new_id; |
| } |
| |
| std::string CanonicalizeIdsPass::IdAsString(spv::Id const id) const { |
| if (id == unused_) { |
| return "unused"; |
| } else if (id == unmapped_) { |
| return "unmapped"; |
| } else { |
| return std::to_string(id); |
| } |
| } |
| |
| void CanonicalizeIdsPass::PrintNewIds() const { |
| for (spv::Id id = 0; id < new_id_.size(); ++id) { |
| auto const message = |
| "new id[" + IdAsString(id) + "]: " + IdAsString(new_id_[id]); |
| context()->consumer()(SPV_MSG_INFO, "", {0, 0, 0}, message.c_str()); |
| } |
| } |
| |
| } // namespace opt |
| } // namespace spvtools |