source/comp/markv_decoder.cpp - SwiftShader - Git at Google

 // Copyright (c) 2018 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "source/comp/markv_decoder.h"

 #include <cstring>
 #include <iterator>
 #include <numeric>

 #include "source/ext_inst.h"
 #include "source/opcode.h"
 #include "spirv-tools/libspirv.hpp"

 namespace spvtools {
 namespace comp {

 spv_result_t MarkvDecoder::DecodeNonIdWord(uint32_t* word) {
   auto* codec = model_->GetNonIdWordHuffmanCodec(inst_.opcode, operand_index_);

   if (codec) {
     uint64_t decoded_value = 0;
     if (!codec->DecodeFromStream(GetReadBitCallback(), &decoded_value))
       return Diag(SPV_ERROR_INVALID_BINARY)
              << "Failed to decode non-id word with Huffman";

     if (decoded_value != MarkvModel::GetMarkvNoneOfTheAbove()) {
       // The word decoded successfully.
       *word = uint32_t(decoded_value);
       assert(*word == decoded_value);
       return SPV_SUCCESS;
     }

     // Received kMarkvNoneOfTheAbove signal, use fallback decoding.
   }

   const size_t chunk_length =
       model_->GetOperandVariableWidthChunkLength(operand_.type);
   if (chunk_length) {
     if (!reader_.ReadVariableWidthU32(word, chunk_length))
       return Diag(SPV_ERROR_INVALID_BINARY)
              << "Failed to decode non-id word with varint";
   } else {
     if (!reader_.ReadUnencoded(word))
       return Diag(SPV_ERROR_INVALID_BINARY)
              << "Failed to read unencoded non-id word";
   }
   return SPV_SUCCESS;
 }

 spv_result_t MarkvDecoder::DecodeOpcodeAndNumberOfOperands(
     uint32_t* opcode, uint32_t* num_operands) {
   // First try to use the Markov chain codec.
   auto* codec =
       model_->GetOpcodeAndNumOperandsMarkovHuffmanCodec(GetPrevOpcode());
   if (codec) {
     uint64_t decoded_value = 0;
     if (!codec->DecodeFromStream(GetReadBitCallback(), &decoded_value))
       return Diag(SPV_ERROR_INTERNAL)
              << "Failed to decode opcode_and_num_operands, previous opcode is "
              << spvOpcodeString(GetPrevOpcode());

     if (decoded_value != MarkvModel::GetMarkvNoneOfTheAbove()) {
       // The word was successfully decoded.
       *opcode = uint32_t(decoded_value & 0xFFFF);
       *num_operands = uint32_t(decoded_value >> 16);
       return SPV_SUCCESS;
     }

     // Received kMarkvNoneOfTheAbove signal, use fallback decoding.
   }

   // Fallback to base-rate codec.
   codec = model_->GetOpcodeAndNumOperandsMarkovHuffmanCodec(SpvOpNop);
   assert(codec);
   uint64_t decoded_value = 0;
   if (!codec->DecodeFromStream(GetReadBitCallback(), &decoded_value))
     return Diag(SPV_ERROR_INTERNAL)
            << "Failed to decode opcode_and_num_operands with global codec";

   if (decoded_value == MarkvModel::GetMarkvNoneOfTheAbove()) {
     // Received kMarkvNoneOfTheAbove signal, fallback further.
     return SPV_UNSUPPORTED;
   }

   *opcode = uint32_t(decoded_value & 0xFFFF);
   *num_operands = uint32_t(decoded_value >> 16);
   return SPV_SUCCESS;
 }

 spv_result_t MarkvDecoder::DecodeMtfRankHuffman(uint64_t mtf,
                                                 uint32_t fallback_method,
                                                 uint32_t* rank) {
   const auto* codec = GetMtfHuffmanCodec(mtf);
   if (!codec) {
     assert(fallback_method != kMtfNone);
     codec = GetMtfHuffmanCodec(fallback_method);
   }

   if (!codec) return Diag(SPV_ERROR_INTERNAL) << "No codec to decode MTF rank";

   uint32_t decoded_value = 0;
   if (!codec->DecodeFromStream(GetReadBitCallback(), &decoded_value))
     return Diag(SPV_ERROR_INTERNAL) << "Failed to decode MTF rank with Huffman";

   if (decoded_value == kMtfRankEncodedByValueSignal) {
     // Decode by value.
     if (!reader_.ReadVariableWidthU32(rank, model_->mtf_rank_chunk_length()))
       return Diag(SPV_ERROR_INTERNAL)
              << "Failed to decode MTF rank with varint";
     *rank += MarkvCodec::kMtfSmallestRankEncodedByValue;
   } else {
     // Decode using Huffman coding.
     assert(decoded_value < MarkvCodec::kMtfSmallestRankEncodedByValue);
     *rank = decoded_value;
   }
   return SPV_SUCCESS;
 }

 spv_result_t MarkvDecoder::DecodeIdWithDescriptor(uint32_t* id) {
   auto* codec =
       model_->GetIdDescriptorHuffmanCodec(inst_.opcode, operand_index_);

   uint64_t mtf = kMtfNone;
   if (codec) {
     uint64_t decoded_value = 0;
     if (!codec->DecodeFromStream(GetReadBitCallback(), &decoded_value))
       return Diag(SPV_ERROR_INTERNAL)
              << "Failed to decode descriptor with Huffman";

     if (decoded_value != MarkvModel::GetMarkvNoneOfTheAbove()) {
       const uint32_t long_descriptor = uint32_t(decoded_value);
       mtf = GetMtfLongIdDescriptor(long_descriptor);
     }
   }

   if (mtf == kMtfNone) {
     if (model_->id_fallback_strategy() !=
         MarkvModel::IdFallbackStrategy::kShortDescriptor) {
       return SPV_UNSUPPORTED;
     }

     uint64_t decoded_value = 0;
     if (!reader_.ReadBits(&decoded_value, MarkvCodec::kShortDescriptorNumBits))
       return Diag(SPV_ERROR_INTERNAL) << "Failed to read short descriptor";
     const uint32_t short_descriptor = uint32_t(decoded_value);
     if (short_descriptor == 0) {
       // Forward declared id.
       return SPV_UNSUPPORTED;
     }
     mtf = GetMtfShortIdDescriptor(short_descriptor);
   }

   return DecodeExistingId(mtf, id);
 }

 spv_result_t MarkvDecoder::DecodeExistingId(uint64_t mtf, uint32_t* id) {
   assert(multi_mtf_.GetSize(mtf) > 0);
   *id = 0;

   uint32_t rank = 0;

   if (multi_mtf_.GetSize(mtf) == 1) {
     rank = 1;
   } else {
     const spv_result_t result =
         DecodeMtfRankHuffman(mtf, kMtfGenericNonZeroRank, &rank);
     if (result != SPV_SUCCESS) return result;
   }

   assert(rank);
   if (!multi_mtf_.ValueFromRank(mtf, rank, id))
     return Diag(SPV_ERROR_INTERNAL) << "MTF rank is out of bounds";

   return SPV_SUCCESS;
 }

 spv_result_t MarkvDecoder::DecodeRefId(uint32_t* id) {
   {
     const spv_result_t result = DecodeIdWithDescriptor(id);
     if (result != SPV_UNSUPPORTED) return result;
   }

   const bool can_forward_declare = spvOperandCanBeForwardDeclaredFunction(
       SpvOp(inst_.opcode))(operand_index_);
   uint32_t rank = 0;
   *id = 0;

   if (model_->id_fallback_strategy() ==
       MarkvModel::IdFallbackStrategy::kRuleBased) {
     uint64_t mtf = GetRuleBasedMtf();
     if (mtf != kMtfNone && !can_forward_declare) {
       return DecodeExistingId(mtf, id);
     }

     if (mtf == kMtfNone) mtf = kMtfAll;
     {
       const spv_result_t result = DecodeMtfRankHuffman(mtf, kMtfAll, &rank);
       if (result != SPV_SUCCESS) return result;
     }

     if (rank == 0) {
       // This is the first occurrence of a forward declared id.
       *id = GetIdBound();
       SetIdBound(*id + 1);
       multi_mtf_.Insert(kMtfAll, *id);
       multi_mtf_.Insert(kMtfForwardDeclared, *id);
       if (mtf != kMtfAll) multi_mtf_.Insert(mtf, *id);
     } else {
       if (!multi_mtf_.ValueFromRank(mtf, rank, id))
         return Diag(SPV_ERROR_INTERNAL) << "MTF rank out of bounds";
     }
   } else {
     assert(can_forward_declare);

     if (!reader_.ReadVariableWidthU32(&rank, model_->mtf_rank_chunk_length()))
       return Diag(SPV_ERROR_INTERNAL)
              << "Failed to decode MTF rank with varint";

     if (rank == 0) {
       // This is the first occurrence of a forward declared id.
       *id = GetIdBound();
       SetIdBound(*id + 1);
       multi_mtf_.Insert(kMtfForwardDeclared, *id);
     } else {
       if (!multi_mtf_.ValueFromRank(kMtfForwardDeclared, rank, id))
         return Diag(SPV_ERROR_INTERNAL) << "MTF rank out of bounds";
     }
   }
   assert(*id);
   return SPV_SUCCESS;
 }

 spv_result_t MarkvDecoder::DecodeTypeId() {
   if (inst_.opcode == SpvOpFunctionParameter) {
     assert(!remaining_function_parameter_types_.empty());
     inst_.type_id = remaining_function_parameter_types_.front();
     remaining_function_parameter_types_.pop_front();
     return SPV_SUCCESS;
   }

   {
     const spv_result_t result = DecodeIdWithDescriptor(&inst_.type_id);
     if (result != SPV_UNSUPPORTED) return result;
   }

   assert(model_->id_fallback_strategy() ==
          MarkvModel::IdFallbackStrategy::kRuleBased);

   uint64_t mtf = GetRuleBasedMtf();
   assert(!spvOperandCanBeForwardDeclaredFunction(SpvOp(inst_.opcode))(
       operand_index_));

   if (mtf == kMtfNone) {
     mtf = kMtfTypeNonFunction;
     // Function types should have been handled by GetRuleBasedMtf.
     assert(inst_.opcode != SpvOpFunction);
   }

   return DecodeExistingId(mtf, &inst_.type_id);
 }

 spv_result_t MarkvDecoder::DecodeResultId() {
   uint32_t rank = 0;

   const uint64_t num_still_forward_declared =
       multi_mtf_.GetSize(kMtfForwardDeclared);

   if (num_still_forward_declared) {
     // Some ids were forward declared. Check if this id is one of them.
     uint64_t id_was_forward_declared;
     if (!reader_.ReadBits(&id_was_forward_declared, 1))
       return Diag(SPV_ERROR_INVALID_BINARY)
              << "Failed to read id_was_forward_declared flag";

     if (id_was_forward_declared) {
       if (!reader_.ReadVariableWidthU32(&rank, model_->mtf_rank_chunk_length()))
         return Diag(SPV_ERROR_INVALID_BINARY)
                << "Failed to read MTF rank of forward declared id";

       if (rank) {
         // The id was forward declared, recover it from kMtfForwardDeclared.
         if (!multi_mtf_.ValueFromRank(kMtfForwardDeclared, rank,
                                       &inst_.result_id))
           return Diag(SPV_ERROR_INTERNAL)
                  << "Forward declared MTF rank is out of bounds";

         // We can now remove the id from kMtfForwardDeclared.
         if (!multi_mtf_.Remove(kMtfForwardDeclared, inst_.result_id))
           return Diag(SPV_ERROR_INTERNAL)
                  << "Failed to remove id from kMtfForwardDeclared";
       }
     }
   }

   if (inst_.result_id == 0) {
     // The id was not forward declared, issue a new id.
     inst_.result_id = GetIdBound();
     SetIdBound(inst_.result_id + 1);
   }

   if (model_->id_fallback_strategy() ==
       MarkvModel::IdFallbackStrategy::kRuleBased) {
     if (!rank) {
       multi_mtf_.Insert(kMtfAll, inst_.result_id);
     }
   }

   return SPV_SUCCESS;
 }

 spv_result_t MarkvDecoder::DecodeLiteralNumber(
     const spv_parsed_operand_t& operand) {
   if (operand.number_bit_width <= 32) {
     uint32_t word = 0;
     const spv_result_t result = DecodeNonIdWord(&word);
     if (result != SPV_SUCCESS) return result;
     inst_words_.push_back(word);
   } else {
     assert(operand.number_bit_width <= 64);
     uint64_t word = 0;
     if (operand.number_kind == SPV_NUMBER_UNSIGNED_INT) {
       if (!reader_.ReadVariableWidthU64(&word, model_->u64_chunk_length()))
         return Diag(SPV_ERROR_INVALID_BINARY) << "Failed to read literal U64";
     } else if (operand.number_kind == SPV_NUMBER_SIGNED_INT) {
       int64_t val = 0;
       if (!reader_.ReadVariableWidthS64(&val, model_->s64_chunk_length(),
                                         model_->s64_block_exponent()))
         return Diag(SPV_ERROR_INVALID_BINARY) << "Failed to read literal S64";
       std::memcpy(&word, &val, 8);
     } else if (operand.number_kind == SPV_NUMBER_FLOATING) {
       if (!reader_.ReadUnencoded(&word))
         return Diag(SPV_ERROR_INVALID_BINARY) << "Failed to read literal F64";
     } else {
       return Diag(SPV_ERROR_INTERNAL) << "Unsupported bit length";
     }
     inst_words_.push_back(static_cast<uint32_t>(word));
     inst_words_.push_back(static_cast<uint32_t>(word >> 32));
   }
   return SPV_SUCCESS;
 }

 bool MarkvDecoder::ReadToByteBreak(size_t byte_break_if_less_than) {
   const size_t num_bits_to_next_byte =
       GetNumBitsToNextByte(reader_.GetNumReadBits());
   if (num_bits_to_next_byte == 0 ||
       num_bits_to_next_byte > byte_break_if_less_than)
     return true;

   uint64_t bits = 0;
   if (!reader_.ReadBits(&bits, num_bits_to_next_byte)) return false;

   assert(bits == 0);
   if (bits != 0) return false;

   return true;
 }

 spv_result_t MarkvDecoder::DecodeModule(std::vector<uint32_t>* spirv_binary) {
   const bool header_read_success =
       reader_.ReadUnencoded(&header_.magic_number) &&
       reader_.ReadUnencoded(&header_.markv_version) &&
       reader_.ReadUnencoded(&header_.markv_model) &&
       reader_.ReadUnencoded(&header_.markv_length_in_bits) &&
       reader_.ReadUnencoded(&header_.spirv_version) &&
       reader_.ReadUnencoded(&header_.spirv_generator);

   if (!header_read_success)
     return Diag(SPV_ERROR_INVALID_BINARY) << "Unable to read MARK-V header";

   if (header_.markv_length_in_bits == 0)
     return Diag(SPV_ERROR_INVALID_BINARY)
            << "Header markv_length_in_bits field is zero";

   if (header_.magic_number != MarkvCodec::kMarkvMagicNumber)
     return Diag(SPV_ERROR_INVALID_BINARY)
            << "MARK-V binary has incorrect magic number";

   // TODO(atgoo@github.com): Print version strings.
   if (header_.markv_version != MarkvCodec::GetMarkvVersion())
     return Diag(SPV_ERROR_INVALID_BINARY)
            << "MARK-V binary and the codec have different versions";

   const uint32_t model_type = header_.markv_model >> 16;
   const uint32_t model_version = header_.markv_model & 0xFFFF;
   if (model_type != model_->model_type())
     return Diag(SPV_ERROR_INVALID_BINARY)
            << "MARK-V binary and the codec use different MARK-V models";

   if (model_version != model_->model_version())
     return Diag(SPV_ERROR_INVALID_BINARY)
            << "MARK-V binary and the codec use different versions if the same "
            << "MARK-V model";

   spirv_.reserve(header_.markv_length_in_bits / 2);  // Heuristic.
   spirv_.resize(5, 0);
   spirv_[0] = SpvMagicNumber;
   spirv_[1] = header_.spirv_version;
   spirv_[2] = header_.spirv_generator;

   if (logger_) {
     reader_.SetCallback(
         [this](const std::string& str) { logger_->AppendBitSequence(str); });
   }

   while (reader_.GetNumReadBits() < header_.markv_length_in_bits) {
     inst_ = {};
     const spv_result_t decode_result = DecodeInstruction();
     if (decode_result != SPV_SUCCESS) return decode_result;
   }

   if (validator_options_) {
     spv_const_binary_t validation_binary = {spirv_.data(), spirv_.size()};
     const spv_result_t result = spvValidateWithOptions(
         context_, validator_options_, &validation_binary, nullptr);
     if (result != SPV_SUCCESS) return result;
   }

   // Validate the decode binary
   if (reader_.GetNumReadBits() != header_.markv_length_in_bits ||
       !reader_.OnlyZeroesLeft()) {
     return Diag(SPV_ERROR_INVALID_BINARY)
            << "MARK-V binary has wrong stated bit length "
            << reader_.GetNumReadBits() << " " << header_.markv_length_in_bits;
   }

   // Decoding of the module is finished, validation state should have correct
   // id bound.
   spirv_[3] = GetIdBound();

   *spirv_binary = std::move(spirv_);
   return SPV_SUCCESS;
 }

 // TODO(atgoo@github.com): The implementation borrows heavily from
 // Parser::parseOperand.
 // Consider coupling them together in some way once MARK-V codec is more mature.
 // For now it's better to keep the code independent for experimentation
 // purposes.
 spv_result_t MarkvDecoder::DecodeOperand(
     size_t operand_offset, const spv_operand_type_t type,
     spv_operand_pattern_t* expected_operands) {
   const SpvOp opcode = static_cast<SpvOp>(inst_.opcode);

   memset(&operand_, 0, sizeof(operand_));

   assert((operand_offset >> 16) == 0);
   operand_.offset = static_cast<uint16_t>(operand_offset);
   operand_.type = type;

   // Set default values, may be updated later.
   operand_.number_kind = SPV_NUMBER_NONE;
   operand_.number_bit_width = 0;

   const size_t first_word_index = inst_words_.size();

   switch (type) {
     case SPV_OPERAND_TYPE_RESULT_ID: {
       const spv_result_t result = DecodeResultId();
       if (result != SPV_SUCCESS) return result;

       inst_words_.push_back(inst_.result_id);
       SetIdBound(std::max(GetIdBound(), inst_.result_id + 1));
       PromoteIfNeeded(inst_.result_id);
       break;
     }

     case SPV_OPERAND_TYPE_TYPE_ID: {
       const spv_result_t result = DecodeTypeId();
       if (result != SPV_SUCCESS) return result;

       inst_words_.push_back(inst_.type_id);
       SetIdBound(std::max(GetIdBound(), inst_.type_id + 1));
       PromoteIfNeeded(inst_.type_id);
       break;
     }

     case SPV_OPERAND_TYPE_ID:
     case SPV_OPERAND_TYPE_OPTIONAL_ID:
     case SPV_OPERAND_TYPE_SCOPE_ID:
     case SPV_OPERAND_TYPE_MEMORY_SEMANTICS_ID: {
       uint32_t id = 0;
       const spv_result_t result = DecodeRefId(&id);
       if (result != SPV_SUCCESS) return result;

       if (id == 0) return Diag(SPV_ERROR_INVALID_BINARY) << "Decoded id is 0";

       if (type == SPV_OPERAND_TYPE_ID || type == SPV_OPERAND_TYPE_OPTIONAL_ID) {
         operand_.type = SPV_OPERAND_TYPE_ID;

         if (opcode == SpvOpExtInst && operand_.offset == 3) {
           // The current word is the extended instruction set id.
           // Set the extended instruction set type for the current
           // instruction.
           auto ext_inst_type_iter = import_id_to_ext_inst_type_.find(id);
           if (ext_inst_type_iter == import_id_to_ext_inst_type_.end()) {
             return Diag(SPV_ERROR_INVALID_ID)
                    << "OpExtInst set id " << id
                    << " does not reference an OpExtInstImport result Id";
           }
           inst_.ext_inst_type = ext_inst_type_iter->second;
         }
       }

       inst_words_.push_back(id);
       SetIdBound(std::max(GetIdBound(), id + 1));
       PromoteIfNeeded(id);
       break;
     }

     case SPV_OPERAND_TYPE_EXTENSION_INSTRUCTION_NUMBER: {
       uint32_t word = 0;
       const spv_result_t result = DecodeNonIdWord(&word);
       if (result != SPV_SUCCESS) return result;

       inst_words_.push_back(word);

       assert(SpvOpExtInst == opcode);
       assert(inst_.ext_inst_type != SPV_EXT_INST_TYPE_NONE);
       spv_ext_inst_desc ext_inst;
       if (grammar_.lookupExtInst(inst_.ext_inst_type, word, &ext_inst))
         return Diag(SPV_ERROR_INVALID_BINARY)
                << "Invalid extended instruction number: " << word;
       spvPushOperandTypes(ext_inst->operandTypes, expected_operands);
       break;
     }

     case SPV_OPERAND_TYPE_LITERAL_INTEGER:
     case SPV_OPERAND_TYPE_OPTIONAL_LITERAL_INTEGER: {
       // These are regular single-word literal integer operands.
       // Post-parsing validation should check the range of the parsed value.
       operand_.type = SPV_OPERAND_TYPE_LITERAL_INTEGER;
       // It turns out they are always unsigned integers!
       operand_.number_kind = SPV_NUMBER_UNSIGNED_INT;
       operand_.number_bit_width = 32;

       uint32_t word = 0;
       const spv_result_t result = DecodeNonIdWord(&word);
       if (result != SPV_SUCCESS) return result;

       inst_words_.push_back(word);
       break;
     }

     case SPV_OPERAND_TYPE_TYPED_LITERAL_NUMBER:
     case SPV_OPERAND_TYPE_OPTIONAL_TYPED_LITERAL_INTEGER: {
       operand_.type = SPV_OPERAND_TYPE_TYPED_LITERAL_NUMBER;
       if (opcode == SpvOpSwitch) {
         // The literal operands have the same type as the value
         // referenced by the selector Id.
         const uint32_t selector_id = inst_words_.at(1);
         const auto type_id_iter = id_to_type_id_.find(selector_id);
         if (type_id_iter == id_to_type_id_.end() || type_id_iter->second == 0) {
           return Diag(SPV_ERROR_INVALID_BINARY)
                  << "Invalid OpSwitch: selector id " << selector_id
                  << " has no type";
         }
         uint32_t type_id = type_id_iter->second;

         if (selector_id == type_id) {
           // Recall that by convention, a result ID that is a type definition
           // maps to itself.
           return Diag(SPV_ERROR_INVALID_BINARY)
                  << "Invalid OpSwitch: selector id " << selector_id
                  << " is a type, not a value";
         }
         if (auto error = SetNumericTypeInfoForType(&operand_, type_id))
           return error;
         if (operand_.number_kind != SPV_NUMBER_UNSIGNED_INT &&
             operand_.number_kind != SPV_NUMBER_SIGNED_INT) {
           return Diag(SPV_ERROR_INVALID_BINARY)
                  << "Invalid OpSwitch: selector id " << selector_id
                  << " is not a scalar integer";
         }
       } else {
         assert(opcode == SpvOpConstant || opcode == SpvOpSpecConstant);
         // The literal number type is determined by the type Id for the
         // constant.
         assert(inst_.type_id);
         if (auto error = SetNumericTypeInfoForType(&operand_, inst_.type_id))
           return error;
       }

       if (auto error = DecodeLiteralNumber(operand_)) return error;

       break;
     }

     case SPV_OPERAND_TYPE_LITERAL_STRING:
     case SPV_OPERAND_TYPE_OPTIONAL_LITERAL_STRING: {
       operand_.type = SPV_OPERAND_TYPE_LITERAL_STRING;
       std::vector<char> str;
       auto* codec = model_->GetLiteralStringHuffmanCodec(inst_.opcode);

       if (codec) {
         std::string decoded_string;
         const bool huffman_result =
             codec->DecodeFromStream(GetReadBitCallback(), &decoded_string);
         assert(huffman_result);
         if (!huffman_result)
           return Diag(SPV_ERROR_INVALID_BINARY)
                  << "Failed to read literal string";

         if (decoded_string != "kMarkvNoneOfTheAbove") {
           std::copy(decoded_string.begin(), decoded_string.end(),
                     std::back_inserter(str));
           str.push_back('\0');
         }
       }

       // The loop is expected to terminate once we encounter '\0' or exhaust
       // the bit stream.
       if (str.empty()) {
         while (true) {
           char ch = 0;
           if (!reader_.ReadUnencoded(&ch))
             return Diag(SPV_ERROR_INVALID_BINARY)
                    << "Failed to read literal string";

           str.push_back(ch);

           if (ch == '\0') break;
         }
       }

       while (str.size() % 4 != 0) str.push_back('\0');

       inst_words_.resize(inst_words_.size() + str.size() / 4);
       std::memcpy(&inst_words_[first_word_index], str.data(), str.size());

       if (SpvOpExtInstImport == opcode) {
         // Record the extended instruction type for the ID for this import.
         // There is only one string literal argument to OpExtInstImport,
         // so it's sufficient to guard this just on the opcode.
         const spv_ext_inst_type_t ext_inst_type =
             spvExtInstImportTypeGet(str.data());
         if (SPV_EXT_INST_TYPE_NONE == ext_inst_type) {
           return Diag(SPV_ERROR_INVALID_BINARY)
                  << "Invalid extended instruction import '" << str.data()
                  << "'";
         }
         // We must have parsed a valid result ID.  It's a condition
         // of the grammar, and we only accept non-zero result Ids.
         assert(inst_.result_id);
         const bool inserted =
             import_id_to_ext_inst_type_.emplace(inst_.result_id, ext_inst_type)
                 .second;
         (void)inserted;
         assert(inserted);
       }
       break;
     }

     case SPV_OPERAND_TYPE_CAPABILITY:
     case SPV_OPERAND_TYPE_SOURCE_LANGUAGE:
     case SPV_OPERAND_TYPE_EXECUTION_MODEL:
     case SPV_OPERAND_TYPE_ADDRESSING_MODEL:
     case SPV_OPERAND_TYPE_MEMORY_MODEL:
     case SPV_OPERAND_TYPE_EXECUTION_MODE:
     case SPV_OPERAND_TYPE_STORAGE_CLASS:
     case SPV_OPERAND_TYPE_DIMENSIONALITY:
     case SPV_OPERAND_TYPE_SAMPLER_ADDRESSING_MODE:
     case SPV_OPERAND_TYPE_SAMPLER_FILTER_MODE:
     case SPV_OPERAND_TYPE_SAMPLER_IMAGE_FORMAT:
     case SPV_OPERAND_TYPE_FP_ROUNDING_MODE:
     case SPV_OPERAND_TYPE_LINKAGE_TYPE:
     case SPV_OPERAND_TYPE_ACCESS_QUALIFIER:
     case SPV_OPERAND_TYPE_OPTIONAL_ACCESS_QUALIFIER:
     case SPV_OPERAND_TYPE_FUNCTION_PARAMETER_ATTRIBUTE:
     case SPV_OPERAND_TYPE_DECORATION:
     case SPV_OPERAND_TYPE_BUILT_IN:
     case SPV_OPERAND_TYPE_GROUP_OPERATION:
     case SPV_OPERAND_TYPE_KERNEL_ENQ_FLAGS:
     case SPV_OPERAND_TYPE_KERNEL_PROFILING_INFO: {
       // A single word that is a plain enum value.
       uint32_t word = 0;
       const spv_result_t result = DecodeNonIdWord(&word);
       if (result != SPV_SUCCESS) return result;

       inst_words_.push_back(word);

       // Map an optional operand type to its corresponding concrete type.
       if (type == SPV_OPERAND_TYPE_OPTIONAL_ACCESS_QUALIFIER)
         operand_.type = SPV_OPERAND_TYPE_ACCESS_QUALIFIER;

       spv_operand_desc entry;
       if (grammar_.lookupOperand(type, word, &entry)) {
         return Diag(SPV_ERROR_INVALID_BINARY)
                << "Invalid " << spvOperandTypeStr(operand_.type)
                << " operand: " << word;
       }

       // Prepare to accept operands to this operand, if needed.
       spvPushOperandTypes(entry->operandTypes, expected_operands);
       break;
     }

     case SPV_OPERAND_TYPE_FP_FAST_MATH_MODE:
     case SPV_OPERAND_TYPE_FUNCTION_CONTROL:
     case SPV_OPERAND_TYPE_LOOP_CONTROL:
     case SPV_OPERAND_TYPE_IMAGE:
     case SPV_OPERAND_TYPE_OPTIONAL_IMAGE:
     case SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS:
     case SPV_OPERAND_TYPE_SELECTION_CONTROL: {
       // This operand is a mask.
       uint32_t word = 0;
       const spv_result_t result = DecodeNonIdWord(&word);
       if (result != SPV_SUCCESS) return result;

       inst_words_.push_back(word);

       // Map an optional operand type to its corresponding concrete type.
       if (type == SPV_OPERAND_TYPE_OPTIONAL_IMAGE)
         operand_.type = SPV_OPERAND_TYPE_IMAGE;
       else if (type == SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS)
         operand_.type = SPV_OPERAND_TYPE_MEMORY_ACCESS;

       // Check validity of set mask bits. Also prepare for operands for those
       // masks if they have any.  To get operand order correct, scan from
       // MSB to LSB since we can only prepend operands to a pattern.
       // The only case in the grammar where you have more than one mask bit
       // having an operand is for image operands.  See SPIR-V 3.14 Image
       // Operands.
       uint32_t remaining_word = word;
       for (uint32_t mask = (1u << 31); remaining_word; mask >>= 1) {
         if (remaining_word & mask) {
           spv_operand_desc entry;
           if (grammar_.lookupOperand(type, mask, &entry)) {
             return Diag(SPV_ERROR_INVALID_BINARY)
                    << "Invalid " << spvOperandTypeStr(operand_.type)
                    << " operand: " << word << " has invalid mask component "
                    << mask;
           }
           remaining_word ^= mask;
           spvPushOperandTypes(entry->operandTypes, expected_operands);
         }
       }
       if (word == 0) {
         // An all-zeroes mask *might* also be valid.
         spv_operand_desc entry;
         if (SPV_SUCCESS == grammar_.lookupOperand(type, 0, &entry)) {
           // Prepare for its operands, if any.
           spvPushOperandTypes(entry->operandTypes, expected_operands);
         }
       }
       break;
     }
     default:
       return Diag(SPV_ERROR_INVALID_BINARY)
              << "Internal error: Unhandled operand type: " << type;
   }

   operand_.num_words = uint16_t(inst_words_.size() - first_word_index);

   assert(spvOperandIsConcrete(operand_.type));

   parsed_operands_.push_back(operand_);

   return SPV_SUCCESS;
 }

 spv_result_t MarkvDecoder::DecodeInstruction() {
   parsed_operands_.clear();
   inst_words_.clear();

   // Opcode/num_words placeholder, the word will be filled in later.
   inst_words_.push_back(0);

   bool num_operands_still_unknown = true;
   {
     uint32_t opcode = 0;
     uint32_t num_operands = 0;

     const spv_result_t opcode_decoding_result =
         DecodeOpcodeAndNumberOfOperands(&opcode, &num_operands);
     if (opcode_decoding_result < 0) return opcode_decoding_result;

     if (opcode_decoding_result == SPV_SUCCESS) {
       inst_.num_operands = static_cast<uint16_t>(num_operands);
       num_operands_still_unknown = false;
     } else {
       if (!reader_.ReadVariableWidthU32(&opcode,
                                         model_->opcode_chunk_length())) {
         return Diag(SPV_ERROR_INVALID_BINARY)
                << "Failed to read opcode of instruction";
       }
     }

     inst_.opcode = static_cast<uint16_t>(opcode);
   }

   const SpvOp opcode = static_cast<SpvOp>(inst_.opcode);

   spv_opcode_desc opcode_desc;
   if (grammar_.lookupOpcode(opcode, &opcode_desc) != SPV_SUCCESS) {
     return Diag(SPV_ERROR_INVALID_BINARY) << "Invalid opcode";
   }

   spv_operand_pattern_t expected_operands;
   expected_operands.reserve(opcode_desc->numTypes);
   for (auto i = 0; i < opcode_desc->numTypes; i++) {
     expected_operands.push_back(
         opcode_desc->operandTypes[opcode_desc->numTypes - i - 1]);
   }

   if (num_operands_still_unknown) {
     if (!OpcodeHasFixedNumberOfOperands(opcode)) {
       if (!reader_.ReadVariableWidthU16(&inst_.num_operands,
                                         model_->num_operands_chunk_length()))
         return Diag(SPV_ERROR_INVALID_BINARY)
                << "Failed to read num_operands of instruction";
     } else {
       inst_.num_operands = static_cast<uint16_t>(expected_operands.size());
     }
   }

   for (operand_index_ = 0;
        operand_index_ < static_cast<size_t>(inst_.num_operands);
        ++operand_index_) {
     assert(!expected_operands.empty());
     const spv_operand_type_t type =
         spvTakeFirstMatchableOperand(&expected_operands);

     const size_t operand_offset = inst_words_.size();

     const spv_result_t decode_result =
         DecodeOperand(operand_offset, type, &expected_operands);

     if (decode_result != SPV_SUCCESS) return decode_result;
   }

   assert(inst_.num_operands == parsed_operands_.size());

   // Only valid while inst_words_ and parsed_operands_ remain unchanged (until
   // next DecodeInstruction call).
   inst_.words = inst_words_.data();
   inst_.operands = parsed_operands_.empty() ? nullptr : parsed_operands_.data();
   inst_.num_words = static_cast<uint16_t>(inst_words_.size());
   inst_words_[0] = spvOpcodeMake(inst_.num_words, SpvOp(inst_.opcode));

   std::copy(inst_words_.begin(), inst_words_.end(), std::back_inserter(spirv_));

   assert(inst_.num_words ==
              std::accumulate(
                  parsed_operands_.begin(), parsed_operands_.end(), 1,
                  [](int num_words, const spv_parsed_operand_t& operand) {
                    return num_words += operand.num_words;
                  }) &&
          "num_words in instruction doesn't correspond to the sum of num_words"
          "in the operands");

   RecordNumberType();
   ProcessCurInstruction();

   if (!ReadToByteBreak(MarkvCodec::kByteBreakAfterInstIfLessThanUntilNextByte))
     return Diag(SPV_ERROR_INVALID_BINARY) << "Failed to read to byte break";

   if (logger_) {
     logger_->NewLine();
     std::stringstream ss;
     ss << spvOpcodeString(opcode) << " ";
     for (size_t index = 1; index < inst_words_.size(); ++index)
       ss << inst_words_[index] << " ";
     logger_->AppendText(ss.str());
     logger_->NewLine();
     logger_->NewLine();
     if (!logger_->DebugInstruction(inst_)) return SPV_REQUESTED_TERMINATION;
   }

   return SPV_SUCCESS;
 }

 spv_result_t MarkvDecoder::SetNumericTypeInfoForType(
     spv_parsed_operand_t* parsed_operand, uint32_t type_id) {
   assert(type_id != 0);
   auto type_info_iter = type_id_to_number_type_info_.find(type_id);
   if (type_info_iter == type_id_to_number_type_info_.end()) {
     return Diag(SPV_ERROR_INVALID_BINARY)
            << "Type Id " << type_id << " is not a type";
   }

   const NumberType& info = type_info_iter->second;
   if (info.type == SPV_NUMBER_NONE) {
     // This is a valid type, but for something other than a scalar number.
     return Diag(SPV_ERROR_INVALID_BINARY)
            << "Type Id " << type_id << " is not a scalar numeric type";
   }

   parsed_operand->number_kind = info.type;
   parsed_operand->number_bit_width = info.bit_width;
   // Round up the word count.
   parsed_operand->num_words = static_cast<uint16_t>((info.bit_width + 31) / 32);
   return SPV_SUCCESS;
 }

 void MarkvDecoder::RecordNumberType() {
   const SpvOp opcode = static_cast<SpvOp>(inst_.opcode);
   if (spvOpcodeGeneratesType(opcode)) {
     NumberType info = {SPV_NUMBER_NONE, 0};
     if (SpvOpTypeInt == opcode) {
       info.bit_width = inst_.words[inst_.operands[1].offset];
       info.type = inst_.words[inst_.operands[2].offset]
                       ? SPV_NUMBER_SIGNED_INT
                       : SPV_NUMBER_UNSIGNED_INT;
     } else if (SpvOpTypeFloat == opcode) {
       info.bit_width = inst_.words[inst_.operands[1].offset];
       info.type = SPV_NUMBER_FLOATING;
     }
     // The *result* Id of a type generating instruction is the type Id.
     type_id_to_number_type_info_[inst_.result_id] = info;
   }
 }

 }  // namespace comp
 }  // namespace spvtools