third_party/SPIRV-Tools/source/opt/convert_to_half_pass.cpp - SwiftShader - Git at Google

 // Copyright (c) 2019 The Khronos Group Inc.
 // Copyright (c) 2019 Valve Corporation
 // Copyright (c) 2019 LunarG Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "convert_to_half_pass.h"

 #include "source/opt/ir_builder.h"

 namespace {

 // Indices of operands in SPIR-V instructions
 static const int kImageSampleDrefIdInIdx = 2;

 }  // anonymous namespace

 namespace spvtools {
 namespace opt {

 bool ConvertToHalfPass::IsArithmetic(Instruction* inst) {
   return target_ops_core_.count(inst->opcode()) != 0 ||
          (inst->opcode() == SpvOpExtInst &&
           inst->GetSingleWordInOperand(0) ==
               context()->get_feature_mgr()->GetExtInstImportId_GLSLstd450() &&
           target_ops_450_.count(inst->GetSingleWordInOperand(1)) != 0);
 }

 bool ConvertToHalfPass::IsFloat(Instruction* inst, uint32_t width) {
   uint32_t ty_id = inst->type_id();
   if (ty_id == 0) return false;
   return Pass::IsFloat(ty_id, width);
 }

 bool ConvertToHalfPass::IsDecoratedRelaxed(Instruction* inst) {
   uint32_t r_id = inst->result_id();
   for (auto r_inst : get_decoration_mgr()->GetDecorationsFor(r_id, false))
     if (r_inst->opcode() == SpvOpDecorate &&
         r_inst->GetSingleWordInOperand(1) == SpvDecorationRelaxedPrecision)
       return true;
   return false;
 }

 bool ConvertToHalfPass::IsRelaxed(uint32_t id) {
   return relaxed_ids_set_.count(id) > 0;
 }

 void ConvertToHalfPass::AddRelaxed(uint32_t id) { relaxed_ids_set_.insert(id); }

 analysis::Type* ConvertToHalfPass::FloatScalarType(uint32_t width) {
   analysis::Float float_ty(width);
   return context()->get_type_mgr()->GetRegisteredType(&float_ty);
 }

 analysis::Type* ConvertToHalfPass::FloatVectorType(uint32_t v_len,
                                                    uint32_t width) {
   analysis::Type* reg_float_ty = FloatScalarType(width);
   analysis::Vector vec_ty(reg_float_ty, v_len);
   return context()->get_type_mgr()->GetRegisteredType(&vec_ty);
 }

 analysis::Type* ConvertToHalfPass::FloatMatrixType(uint32_t v_cnt,
                                                    uint32_t vty_id,
                                                    uint32_t width) {
   Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id);
   uint32_t v_len = vty_inst->GetSingleWordInOperand(1);
   analysis::Type* reg_vec_ty = FloatVectorType(v_len, width);
   analysis::Matrix mat_ty(reg_vec_ty, v_cnt);
   return context()->get_type_mgr()->GetRegisteredType(&mat_ty);
 }

 uint32_t ConvertToHalfPass::EquivFloatTypeId(uint32_t ty_id, uint32_t width) {
   analysis::Type* reg_equiv_ty;
   Instruction* ty_inst = get_def_use_mgr()->GetDef(ty_id);
   if (ty_inst->opcode() == SpvOpTypeMatrix)
     reg_equiv_ty = FloatMatrixType(ty_inst->GetSingleWordInOperand(1),
                                    ty_inst->GetSingleWordInOperand(0), width);
   else if (ty_inst->opcode() == SpvOpTypeVector)
     reg_equiv_ty = FloatVectorType(ty_inst->GetSingleWordInOperand(1), width);
   else  // SpvOpTypeFloat
     reg_equiv_ty = FloatScalarType(width);
   return context()->get_type_mgr()->GetTypeInstruction(reg_equiv_ty);
 }

 void ConvertToHalfPass::GenConvert(uint32_t* val_idp, uint32_t width,
                                    Instruction* inst) {
   Instruction* val_inst = get_def_use_mgr()->GetDef(*val_idp);
   uint32_t ty_id = val_inst->type_id();
   uint32_t nty_id = EquivFloatTypeId(ty_id, width);
   if (nty_id == ty_id) return;
   Instruction* cvt_inst;
   InstructionBuilder builder(
       context(), inst,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
   if (val_inst->opcode() == SpvOpUndef)
     cvt_inst = builder.AddNullaryOp(nty_id, SpvOpUndef);
   else
     cvt_inst = builder.AddUnaryOp(nty_id, SpvOpFConvert, *val_idp);
   *val_idp = cvt_inst->result_id();
 }

 bool ConvertToHalfPass::MatConvertCleanup(Instruction* inst) {
   if (inst->opcode() != SpvOpFConvert) return false;
   uint32_t mty_id = inst->type_id();
   Instruction* mty_inst = get_def_use_mgr()->GetDef(mty_id);
   if (mty_inst->opcode() != SpvOpTypeMatrix) return false;
   uint32_t vty_id = mty_inst->GetSingleWordInOperand(0);
   uint32_t v_cnt = mty_inst->GetSingleWordInOperand(1);
   Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id);
   uint32_t cty_id = vty_inst->GetSingleWordInOperand(0);
   Instruction* cty_inst = get_def_use_mgr()->GetDef(cty_id);
   InstructionBuilder builder(
       context(), inst,
       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
   // Convert each component vector, combine them with OpCompositeConstruct
   // and replace original instruction.
   uint32_t orig_width = (cty_inst->GetSingleWordInOperand(0) == 16) ? 32 : 16;
   uint32_t orig_mat_id = inst->GetSingleWordInOperand(0);
   uint32_t orig_vty_id = EquivFloatTypeId(vty_id, orig_width);
   std::vector<Operand> opnds = {};
   for (uint32_t vidx = 0; vidx < v_cnt; ++vidx) {
     Instruction* ext_inst = builder.AddIdLiteralOp(
         orig_vty_id, SpvOpCompositeExtract, orig_mat_id, vidx);
     Instruction* cvt_inst =
         builder.AddUnaryOp(vty_id, SpvOpFConvert, ext_inst->result_id());
     opnds.push_back({SPV_OPERAND_TYPE_ID, {cvt_inst->result_id()}});
   }
   uint32_t mat_id = TakeNextId();
   std::unique_ptr<Instruction> mat_inst(new Instruction(
       context(), SpvOpCompositeConstruct, mty_id, mat_id, opnds));
   (void)builder.AddInstruction(std::move(mat_inst));
   context()->ReplaceAllUsesWith(inst->result_id(), mat_id);
   // Turn original instruction into copy so it is valid.
   inst->SetOpcode(SpvOpCopyObject);
   inst->SetResultType(EquivFloatTypeId(mty_id, orig_width));
   get_def_use_mgr()->AnalyzeInstUse(inst);
   return true;
 }

 bool ConvertToHalfPass::RemoveRelaxedDecoration(uint32_t id) {
   return context()->get_decoration_mgr()->RemoveDecorationsFrom(
       id, [](const Instruction& dec) {
         if (dec.opcode() == SpvOpDecorate &&
             dec.GetSingleWordInOperand(1u) == SpvDecorationRelaxedPrecision)
           return true;
         else
           return false;
       });
 }

 bool ConvertToHalfPass::GenHalfArith(Instruction* inst) {
   bool modified = false;
   // Convert all float32 based operands to float16 equivalent and change
   // instruction type to float16 equivalent.
   inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
     Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
     if (!IsFloat(op_inst, 32)) return;
     GenConvert(idp, 16, inst);
     modified = true;
   });
   if (IsFloat(inst, 32)) {
     inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
     converted_ids_.insert(inst->result_id());
     modified = true;
   }
   if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
   return modified;
 }

 bool ConvertToHalfPass::ProcessPhi(Instruction* inst, uint32_t from_width,
                                    uint32_t to_width) {
   // Add converts of any float operands to to_width if they are of from_width.
   // If converting to 16, change type of phi to float16 equivalent and remember
   // result id. Converts need to be added to preceding blocks.
   uint32_t ocnt = 0;
   uint32_t* prev_idp;
   bool modified = false;
   inst->ForEachInId([&ocnt, &prev_idp, &from_width, &to_width, &modified,
                      this](uint32_t* idp) {
     if (ocnt % 2 == 0) {
       prev_idp = idp;
     } else {
       Instruction* val_inst = get_def_use_mgr()->GetDef(*prev_idp);
       if (IsFloat(val_inst, from_width)) {
         BasicBlock* bp = context()->get_instr_block(*idp);
         auto insert_before = bp->tail();
         if (insert_before != bp->begin()) {
           --insert_before;
           if (insert_before->opcode() != SpvOpSelectionMerge &&
               insert_before->opcode() != SpvOpLoopMerge)
             ++insert_before;
         }
         GenConvert(prev_idp, to_width, &*insert_before);
         modified = true;
       }
     }
     ++ocnt;
   });
   if (to_width == 16u) {
     inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16u));
     converted_ids_.insert(inst->result_id());
     modified = true;
   }
   if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
   return modified;
 }

 bool ConvertToHalfPass::ProcessConvert(Instruction* inst) {
   // If float32 and relaxed, change to float16 convert
   if (IsFloat(inst, 32) && IsRelaxed(inst->result_id())) {
     inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
     get_def_use_mgr()->AnalyzeInstUse(inst);
     converted_ids_.insert(inst->result_id());
   }
   // If operand and result types are the same, change FConvert to CopyObject to
   // keep validator happy; simplification and DCE will clean it up
   // One way this can happen is if an FConvert generated during this pass
   // (likely by ProcessPhi) is later encountered here and its operand has been
   // changed to half.
   uint32_t val_id = inst->GetSingleWordInOperand(0);
   Instruction* val_inst = get_def_use_mgr()->GetDef(val_id);
   if (inst->type_id() == val_inst->type_id()) inst->SetOpcode(SpvOpCopyObject);
   return true;  // modified
 }

 bool ConvertToHalfPass::ProcessImageRef(Instruction* inst) {
   bool modified = false;
   // If image reference, only need to convert dref args back to float32
   if (dref_image_ops_.count(inst->opcode()) != 0) {
     uint32_t dref_id = inst->GetSingleWordInOperand(kImageSampleDrefIdInIdx);
     if (converted_ids_.count(dref_id) > 0) {
       GenConvert(&dref_id, 32, inst);
       inst->SetInOperand(kImageSampleDrefIdInIdx, {dref_id});
       get_def_use_mgr()->AnalyzeInstUse(inst);
       modified = true;
     }
   }
   return modified;
 }

 bool ConvertToHalfPass::ProcessDefault(Instruction* inst) {
   // If non-relaxed instruction has changed operands, need to convert
   // them back to float32
   if (inst->opcode() == SpvOpPhi) return ProcessPhi(inst, 16u, 32u);
   bool modified = false;
   inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
     if (converted_ids_.count(*idp) == 0) return;
     uint32_t old_id = *idp;
     GenConvert(idp, 32, inst);
     if (*idp != old_id) modified = true;
   });
   if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
   return modified;
 }

 bool ConvertToHalfPass::GenHalfInst(Instruction* inst) {
   bool modified = false;
   // Remember id for later deletion of RelaxedPrecision decoration
   bool inst_relaxed = IsRelaxed(inst->result_id());
   if (IsArithmetic(inst) && inst_relaxed)
     modified = GenHalfArith(inst);
   else if (inst->opcode() == SpvOpPhi && inst_relaxed)
     modified = ProcessPhi(inst, 32u, 16u);
   else if (inst->opcode() == SpvOpFConvert)
     modified = ProcessConvert(inst);
   else if (image_ops_.count(inst->opcode()) != 0)
     modified = ProcessImageRef(inst);
   else
     modified = ProcessDefault(inst);
   return modified;
 }

 bool ConvertToHalfPass::CloseRelaxInst(Instruction* inst) {
   if (inst->result_id() == 0) return false;
   if (IsRelaxed(inst->result_id())) return false;
   if (!IsFloat(inst, 32)) return false;
   if (IsDecoratedRelaxed(inst)) {
     AddRelaxed(inst->result_id());
     return true;
   }
   if (closure_ops_.count(inst->opcode()) == 0) return false;
   // Can relax if all float operands are relaxed
   bool relax = true;
   inst->ForEachInId([&relax, this](uint32_t* idp) {
     Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
     if (!IsFloat(op_inst, 32)) return;
     if (!IsRelaxed(*idp)) relax = false;
   });
   if (relax) {
     AddRelaxed(inst->result_id());
     return true;
   }
   // Can relax if all uses are relaxed
   relax = true;
   get_def_use_mgr()->ForEachUser(inst, [&relax, this](Instruction* uinst) {
     if (uinst->result_id() == 0 || !IsFloat(uinst, 32) ||
         (!IsDecoratedRelaxed(uinst) && !IsRelaxed(uinst->result_id()))) {
       relax = false;
       return;
     }
   });
   if (relax) {
     AddRelaxed(inst->result_id());
     return true;
   }
   return false;
 }

 bool ConvertToHalfPass::ProcessFunction(Function* func) {
   // Do a closure of Relaxed on composite and phi instructions
   bool changed = true;
   while (changed) {
     changed = false;
     cfg()->ForEachBlockInReversePostOrder(
         func->entry().get(), [&changed, this](BasicBlock* bb) {
           for (auto ii = bb->begin(); ii != bb->end(); ++ii)
             changed |= CloseRelaxInst(&*ii);
         });
   }
   // Do convert of relaxed instructions to half precision
   bool modified = false;
   cfg()->ForEachBlockInReversePostOrder(
       func->entry().get(), [&modified, this](BasicBlock* bb) {
         for (auto ii = bb->begin(); ii != bb->end(); ++ii)
           modified |= GenHalfInst(&*ii);
       });
   // Replace invalid converts of matrix into equivalent vector extracts,
   // converts and finally a composite construct
   cfg()->ForEachBlockInReversePostOrder(
       func->entry().get(), [&modified, this](BasicBlock* bb) {
         for (auto ii = bb->begin(); ii != bb->end(); ++ii)
           modified |= MatConvertCleanup(&*ii);
       });
   return modified;
 }

 Pass::Status ConvertToHalfPass::ProcessImpl() {
   Pass::ProcessFunction pfn = [this](Function* fp) {
     return ProcessFunction(fp);
   };
   bool modified = context()->ProcessReachableCallTree(pfn);
   // If modified, make sure module has Float16 capability
   if (modified) context()->AddCapability(SpvCapabilityFloat16);
   // Remove all RelaxedPrecision decorations from instructions and globals
   for (auto c_id : relaxed_ids_set_) {
     modified |= RemoveRelaxedDecoration(c_id);
   }
   for (auto& val : get_module()->types_values()) {
     uint32_t v_id = val.result_id();
     if (v_id != 0) {
       modified |= RemoveRelaxedDecoration(v_id);
     }
   }
   return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
 }

 Pass::Status ConvertToHalfPass::Process() {
   Initialize();
   return ProcessImpl();
 }

 void ConvertToHalfPass::Initialize() {
   target_ops_core_ = {
       SpvOpVectorExtractDynamic,
       SpvOpVectorInsertDynamic,
       SpvOpVectorShuffle,
       SpvOpCompositeConstruct,
       SpvOpCompositeInsert,
       SpvOpCompositeExtract,
       SpvOpCopyObject,
       SpvOpTranspose,
       SpvOpConvertSToF,
       SpvOpConvertUToF,
       // SpvOpFConvert,
       // SpvOpQuantizeToF16,
       SpvOpFNegate,
       SpvOpFAdd,
       SpvOpFSub,
       SpvOpFMul,
       SpvOpFDiv,
       SpvOpFMod,
       SpvOpVectorTimesScalar,
       SpvOpMatrixTimesScalar,
       SpvOpVectorTimesMatrix,
       SpvOpMatrixTimesVector,
       SpvOpMatrixTimesMatrix,
       SpvOpOuterProduct,
       SpvOpDot,
       SpvOpSelect,
       SpvOpFOrdEqual,
       SpvOpFUnordEqual,
       SpvOpFOrdNotEqual,
       SpvOpFUnordNotEqual,
       SpvOpFOrdLessThan,
       SpvOpFUnordLessThan,
       SpvOpFOrdGreaterThan,
       SpvOpFUnordGreaterThan,
       SpvOpFOrdLessThanEqual,
       SpvOpFUnordLessThanEqual,
       SpvOpFOrdGreaterThanEqual,
       SpvOpFUnordGreaterThanEqual,
   };
   target_ops_450_ = {
       GLSLstd450Round, GLSLstd450RoundEven, GLSLstd450Trunc, GLSLstd450FAbs,
       GLSLstd450FSign, GLSLstd450Floor, GLSLstd450Ceil, GLSLstd450Fract,
       GLSLstd450Radians, GLSLstd450Degrees, GLSLstd450Sin, GLSLstd450Cos,
       GLSLstd450Tan, GLSLstd450Asin, GLSLstd450Acos, GLSLstd450Atan,
       GLSLstd450Sinh, GLSLstd450Cosh, GLSLstd450Tanh, GLSLstd450Asinh,
       GLSLstd450Acosh, GLSLstd450Atanh, GLSLstd450Atan2, GLSLstd450Pow,
       GLSLstd450Exp, GLSLstd450Log, GLSLstd450Exp2, GLSLstd450Log2,
       GLSLstd450Sqrt, GLSLstd450InverseSqrt, GLSLstd450Determinant,
       GLSLstd450MatrixInverse,
       // TODO(greg-lunarg): GLSLstd450ModfStruct,
       GLSLstd450FMin, GLSLstd450FMax, GLSLstd450FClamp, GLSLstd450FMix,
       GLSLstd450Step, GLSLstd450SmoothStep, GLSLstd450Fma,
       // TODO(greg-lunarg): GLSLstd450FrexpStruct,
       GLSLstd450Ldexp, GLSLstd450Length, GLSLstd450Distance, GLSLstd450Cross,
       GLSLstd450Normalize, GLSLstd450FaceForward, GLSLstd450Reflect,
       GLSLstd450Refract, GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp};
   image_ops_ = {SpvOpImageSampleImplicitLod,
                 SpvOpImageSampleExplicitLod,
                 SpvOpImageSampleDrefImplicitLod,
                 SpvOpImageSampleDrefExplicitLod,
                 SpvOpImageSampleProjImplicitLod,
                 SpvOpImageSampleProjExplicitLod,
                 SpvOpImageSampleProjDrefImplicitLod,
                 SpvOpImageSampleProjDrefExplicitLod,
                 SpvOpImageFetch,
                 SpvOpImageGather,
                 SpvOpImageDrefGather,
                 SpvOpImageRead,
                 SpvOpImageSparseSampleImplicitLod,
                 SpvOpImageSparseSampleExplicitLod,
                 SpvOpImageSparseSampleDrefImplicitLod,
                 SpvOpImageSparseSampleDrefExplicitLod,
                 SpvOpImageSparseSampleProjImplicitLod,
                 SpvOpImageSparseSampleProjExplicitLod,
                 SpvOpImageSparseSampleProjDrefImplicitLod,
                 SpvOpImageSparseSampleProjDrefExplicitLod,
                 SpvOpImageSparseFetch,
                 SpvOpImageSparseGather,
                 SpvOpImageSparseDrefGather,
                 SpvOpImageSparseTexelsResident,
                 SpvOpImageSparseRead};
   dref_image_ops_ = {
       SpvOpImageSampleDrefImplicitLod,
       SpvOpImageSampleDrefExplicitLod,
       SpvOpImageSampleProjDrefImplicitLod,
       SpvOpImageSampleProjDrefExplicitLod,
       SpvOpImageDrefGather,
       SpvOpImageSparseSampleDrefImplicitLod,
       SpvOpImageSparseSampleDrefExplicitLod,
       SpvOpImageSparseSampleProjDrefImplicitLod,
       SpvOpImageSparseSampleProjDrefExplicitLod,
       SpvOpImageSparseDrefGather,
   };
   closure_ops_ = {
       SpvOpVectorExtractDynamic,
       SpvOpVectorInsertDynamic,
       SpvOpVectorShuffle,
       SpvOpCompositeConstruct,
       SpvOpCompositeInsert,
       SpvOpCompositeExtract,
       SpvOpCopyObject,
       SpvOpTranspose,
       SpvOpPhi,
   };
   relaxed_ids_set_.clear();
   converted_ids_.clear();
 }

 }  // namespace opt
 }  // namespace spvtools
	// Copyright (c) 2019 The Khronos Group Inc.
	// Copyright (c) 2019 Valve Corporation
	// Copyright (c) 2019 LunarG Inc.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#include "convert_to_half_pass.h"

	#include "source/opt/ir_builder.h"

	namespace {

	// Indices of operands in SPIR-V instructions
	static const int kImageSampleDrefIdInIdx = 2;

	} // anonymous namespace

	namespace spvtools {
	namespace opt {

	bool ConvertToHalfPass::IsArithmetic(Instruction* inst) {
	return target_ops_core_.count(inst->opcode()) != 0 \|\|
	(inst->opcode() == SpvOpExtInst &&
	inst->GetSingleWordInOperand(0) ==
	context()->get_feature_mgr()->GetExtInstImportId_GLSLstd450() &&
	target_ops_450_.count(inst->GetSingleWordInOperand(1)) != 0);
	}

	bool ConvertToHalfPass::IsFloat(Instruction* inst, uint32_t width) {
	uint32_t ty_id = inst->type_id();
	if (ty_id == 0) return false;
	return Pass::IsFloat(ty_id, width);
	}

	bool ConvertToHalfPass::IsDecoratedRelaxed(Instruction* inst) {
	uint32_t r_id = inst->result_id();
	for (auto r_inst : get_decoration_mgr()->GetDecorationsFor(r_id, false))
	if (r_inst->opcode() == SpvOpDecorate &&
	r_inst->GetSingleWordInOperand(1) == SpvDecorationRelaxedPrecision)
	return true;
	return false;
	}

	bool ConvertToHalfPass::IsRelaxed(uint32_t id) {
	return relaxed_ids_set_.count(id) > 0;
	}

	void ConvertToHalfPass::AddRelaxed(uint32_t id) { relaxed_ids_set_.insert(id); }

	analysis::Type* ConvertToHalfPass::FloatScalarType(uint32_t width) {
	analysis::Float float_ty(width);
	return context()->get_type_mgr()->GetRegisteredType(&float_ty);
	}

	analysis::Type* ConvertToHalfPass::FloatVectorType(uint32_t v_len,
	uint32_t width) {
	analysis::Type* reg_float_ty = FloatScalarType(width);
	analysis::Vector vec_ty(reg_float_ty, v_len);
	return context()->get_type_mgr()->GetRegisteredType(&vec_ty);
	}

	analysis::Type* ConvertToHalfPass::FloatMatrixType(uint32_t v_cnt,
	uint32_t vty_id,
	uint32_t width) {
	Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id);
	uint32_t v_len = vty_inst->GetSingleWordInOperand(1);
	analysis::Type* reg_vec_ty = FloatVectorType(v_len, width);
	analysis::Matrix mat_ty(reg_vec_ty, v_cnt);
	return context()->get_type_mgr()->GetRegisteredType(&mat_ty);
	}

	uint32_t ConvertToHalfPass::EquivFloatTypeId(uint32_t ty_id, uint32_t width) {
	analysis::Type* reg_equiv_ty;
	Instruction* ty_inst = get_def_use_mgr()->GetDef(ty_id);
	if (ty_inst->opcode() == SpvOpTypeMatrix)
	reg_equiv_ty = FloatMatrixType(ty_inst->GetSingleWordInOperand(1),
	ty_inst->GetSingleWordInOperand(0), width);
	else if (ty_inst->opcode() == SpvOpTypeVector)
	reg_equiv_ty = FloatVectorType(ty_inst->GetSingleWordInOperand(1), width);
	else // SpvOpTypeFloat
	reg_equiv_ty = FloatScalarType(width);
	return context()->get_type_mgr()->GetTypeInstruction(reg_equiv_ty);
	}

	void ConvertToHalfPass::GenConvert(uint32_t* val_idp, uint32_t width,
	Instruction* inst) {
	Instruction* val_inst = get_def_use_mgr()->GetDef(*val_idp);
	uint32_t ty_id = val_inst->type_id();
	uint32_t nty_id = EquivFloatTypeId(ty_id, width);
	if (nty_id == ty_id) return;
	Instruction* cvt_inst;
	InstructionBuilder builder(
	context(), inst,
	IRContext::kAnalysisDefUse \| IRContext::kAnalysisInstrToBlockMapping);
	if (val_inst->opcode() == SpvOpUndef)
	cvt_inst = builder.AddNullaryOp(nty_id, SpvOpUndef);
	else
	cvt_inst = builder.AddUnaryOp(nty_id, SpvOpFConvert, *val_idp);
	*val_idp = cvt_inst->result_id();
	}

	bool ConvertToHalfPass::MatConvertCleanup(Instruction* inst) {
	if (inst->opcode() != SpvOpFConvert) return false;
	uint32_t mty_id = inst->type_id();
	Instruction* mty_inst = get_def_use_mgr()->GetDef(mty_id);
	if (mty_inst->opcode() != SpvOpTypeMatrix) return false;
	uint32_t vty_id = mty_inst->GetSingleWordInOperand(0);
	uint32_t v_cnt = mty_inst->GetSingleWordInOperand(1);
	Instruction* vty_inst = get_def_use_mgr()->GetDef(vty_id);
	uint32_t cty_id = vty_inst->GetSingleWordInOperand(0);
	Instruction* cty_inst = get_def_use_mgr()->GetDef(cty_id);
	InstructionBuilder builder(
	context(), inst,
	IRContext::kAnalysisDefUse \| IRContext::kAnalysisInstrToBlockMapping);
	// Convert each component vector, combine them with OpCompositeConstruct
	// and replace original instruction.
	uint32_t orig_width = (cty_inst->GetSingleWordInOperand(0) == 16) ? 32 : 16;
	uint32_t orig_mat_id = inst->GetSingleWordInOperand(0);
	uint32_t orig_vty_id = EquivFloatTypeId(vty_id, orig_width);
	std::vector<Operand> opnds = {};
	for (uint32_t vidx = 0; vidx < v_cnt; ++vidx) {
	Instruction* ext_inst = builder.AddIdLiteralOp(
	orig_vty_id, SpvOpCompositeExtract, orig_mat_id, vidx);
	Instruction* cvt_inst =
	builder.AddUnaryOp(vty_id, SpvOpFConvert, ext_inst->result_id());
	opnds.push_back({SPV_OPERAND_TYPE_ID, {cvt_inst->result_id()}});
	}
	uint32_t mat_id = TakeNextId();
	std::unique_ptr<Instruction> mat_inst(new Instruction(
	context(), SpvOpCompositeConstruct, mty_id, mat_id, opnds));
	(void)builder.AddInstruction(std::move(mat_inst));
	context()->ReplaceAllUsesWith(inst->result_id(), mat_id);
	// Turn original instruction into copy so it is valid.
	inst->SetOpcode(SpvOpCopyObject);
	inst->SetResultType(EquivFloatTypeId(mty_id, orig_width));
	get_def_use_mgr()->AnalyzeInstUse(inst);
	return true;
	}

	bool ConvertToHalfPass::RemoveRelaxedDecoration(uint32_t id) {
	return context()->get_decoration_mgr()->RemoveDecorationsFrom(
	id, [](const Instruction& dec) {
	if (dec.opcode() == SpvOpDecorate &&
	dec.GetSingleWordInOperand(1u) == SpvDecorationRelaxedPrecision)
	return true;
	else
	return false;
	});
	}

	bool ConvertToHalfPass::GenHalfArith(Instruction* inst) {
	bool modified = false;
	// Convert all float32 based operands to float16 equivalent and change
	// instruction type to float16 equivalent.
	inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
	Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
	if (!IsFloat(op_inst, 32)) return;
	GenConvert(idp, 16, inst);
	modified = true;
	});
	if (IsFloat(inst, 32)) {
	inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
	converted_ids_.insert(inst->result_id());
	modified = true;
	}
	if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
	return modified;
	}

	bool ConvertToHalfPass::ProcessPhi(Instruction* inst, uint32_t from_width,
	uint32_t to_width) {
	// Add converts of any float operands to to_width if they are of from_width.
	// If converting to 16, change type of phi to float16 equivalent and remember
	// result id. Converts need to be added to preceding blocks.
	uint32_t ocnt = 0;
	uint32_t* prev_idp;
	bool modified = false;
	inst->ForEachInId([&ocnt, &prev_idp, &from_width, &to_width, &modified,
	this](uint32_t* idp) {
	if (ocnt % 2 == 0) {
	prev_idp = idp;
	} else {
	Instruction* val_inst = get_def_use_mgr()->GetDef(*prev_idp);
	if (IsFloat(val_inst, from_width)) {
	BasicBlock* bp = context()->get_instr_block(*idp);
	auto insert_before = bp->tail();
	if (insert_before != bp->begin()) {
	--insert_before;
	if (insert_before->opcode() != SpvOpSelectionMerge &&
	insert_before->opcode() != SpvOpLoopMerge)
	++insert_before;
	}
	GenConvert(prev_idp, to_width, &*insert_before);
	modified = true;
	}
	}
	++ocnt;
	});
	if (to_width == 16u) {
	inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16u));
	converted_ids_.insert(inst->result_id());
	modified = true;
	}
	if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
	return modified;
	}

	bool ConvertToHalfPass::ProcessConvert(Instruction* inst) {
	// If float32 and relaxed, change to float16 convert
	if (IsFloat(inst, 32) && IsRelaxed(inst->result_id())) {
	inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
	get_def_use_mgr()->AnalyzeInstUse(inst);
	converted_ids_.insert(inst->result_id());
	}
	// If operand and result types are the same, change FConvert to CopyObject to
	// keep validator happy; simplification and DCE will clean it up
	// One way this can happen is if an FConvert generated during this pass
	// (likely by ProcessPhi) is later encountered here and its operand has been
	// changed to half.
	uint32_t val_id = inst->GetSingleWordInOperand(0);
	Instruction* val_inst = get_def_use_mgr()->GetDef(val_id);
	if (inst->type_id() == val_inst->type_id()) inst->SetOpcode(SpvOpCopyObject);
	return true; // modified
	}

	bool ConvertToHalfPass::ProcessImageRef(Instruction* inst) {
	bool modified = false;
	// If image reference, only need to convert dref args back to float32
	if (dref_image_ops_.count(inst->opcode()) != 0) {
	uint32_t dref_id = inst->GetSingleWordInOperand(kImageSampleDrefIdInIdx);
	if (converted_ids_.count(dref_id) > 0) {
	GenConvert(&dref_id, 32, inst);
	inst->SetInOperand(kImageSampleDrefIdInIdx, {dref_id});
	get_def_use_mgr()->AnalyzeInstUse(inst);
	modified = true;
	}
	}
	return modified;
	}

	bool ConvertToHalfPass::ProcessDefault(Instruction* inst) {
	// If non-relaxed instruction has changed operands, need to convert
	// them back to float32
	if (inst->opcode() == SpvOpPhi) return ProcessPhi(inst, 16u, 32u);
	bool modified = false;
	inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
	if (converted_ids_.count(*idp) == 0) return;
	uint32_t old_id = *idp;
	GenConvert(idp, 32, inst);
	if (*idp != old_id) modified = true;
	});
	if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
	return modified;
	}

	bool ConvertToHalfPass::GenHalfInst(Instruction* inst) {
	bool modified = false;
	// Remember id for later deletion of RelaxedPrecision decoration
	bool inst_relaxed = IsRelaxed(inst->result_id());
	if (IsArithmetic(inst) && inst_relaxed)
	modified = GenHalfArith(inst);
	else if (inst->opcode() == SpvOpPhi && inst_relaxed)
	modified = ProcessPhi(inst, 32u, 16u);
	else if (inst->opcode() == SpvOpFConvert)
	modified = ProcessConvert(inst);
	else if (image_ops_.count(inst->opcode()) != 0)
	modified = ProcessImageRef(inst);
	else
	modified = ProcessDefault(inst);
	return modified;
	}

	bool ConvertToHalfPass::CloseRelaxInst(Instruction* inst) {
	if (inst->result_id() == 0) return false;
	if (IsRelaxed(inst->result_id())) return false;
	if (!IsFloat(inst, 32)) return false;
	if (IsDecoratedRelaxed(inst)) {
	AddRelaxed(inst->result_id());
	return true;
	}
	if (closure_ops_.count(inst->opcode()) == 0) return false;
	// Can relax if all float operands are relaxed
	bool relax = true;
	inst->ForEachInId([&relax, this](uint32_t* idp) {
	Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
	if (!IsFloat(op_inst, 32)) return;
	if (!IsRelaxed(*idp)) relax = false;
	});
	if (relax) {
	AddRelaxed(inst->result_id());
	return true;
	}
	// Can relax if all uses are relaxed
	relax = true;
	get_def_use_mgr()->ForEachUser(inst, [&relax, this](Instruction* uinst) {
	if (uinst->result_id() == 0 \|\| !IsFloat(uinst, 32) \|\|
	(!IsDecoratedRelaxed(uinst) && !IsRelaxed(uinst->result_id()))) {
	relax = false;
	return;
	}
	});
	if (relax) {
	AddRelaxed(inst->result_id());
	return true;
	}
	return false;
	}

	bool ConvertToHalfPass::ProcessFunction(Function* func) {
	// Do a closure of Relaxed on composite and phi instructions
	bool changed = true;
	while (changed) {
	changed = false;
	cfg()->ForEachBlockInReversePostOrder(
	func->entry().get(), [&changed, this](BasicBlock* bb) {
	for (auto ii = bb->begin(); ii != bb->end(); ++ii)
	changed \|= CloseRelaxInst(&*ii);
	});
	}
	// Do convert of relaxed instructions to half precision
	bool modified = false;
	cfg()->ForEachBlockInReversePostOrder(
	func->entry().get(), [&modified, this](BasicBlock* bb) {
	for (auto ii = bb->begin(); ii != bb->end(); ++ii)
	modified \|= GenHalfInst(&*ii);
	});
	// Replace invalid converts of matrix into equivalent vector extracts,
	// converts and finally a composite construct
	cfg()->ForEachBlockInReversePostOrder(
	func->entry().get(), [&modified, this](BasicBlock* bb) {
	for (auto ii = bb->begin(); ii != bb->end(); ++ii)
	modified \|= MatConvertCleanup(&*ii);
	});
	return modified;
	}

	Pass::Status ConvertToHalfPass::ProcessImpl() {
	Pass::ProcessFunction pfn = [this](Function* fp) {
	return ProcessFunction(fp);
	};
	bool modified = context()->ProcessReachableCallTree(pfn);
	// If modified, make sure module has Float16 capability
	if (modified) context()->AddCapability(SpvCapabilityFloat16);
	// Remove all RelaxedPrecision decorations from instructions and globals
	for (auto c_id : relaxed_ids_set_) {
	modified \|= RemoveRelaxedDecoration(c_id);
	}
	for (auto& val : get_module()->types_values()) {
	uint32_t v_id = val.result_id();
	if (v_id != 0) {
	modified \|= RemoveRelaxedDecoration(v_id);
	}
	}
	return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
	}

	Pass::Status ConvertToHalfPass::Process() {
	Initialize();
	return ProcessImpl();
	}

	void ConvertToHalfPass::Initialize() {
	target_ops_core_ = {
	SpvOpVectorExtractDynamic,
	SpvOpVectorInsertDynamic,
	SpvOpVectorShuffle,
	SpvOpCompositeConstruct,
	SpvOpCompositeInsert,
	SpvOpCompositeExtract,
	SpvOpCopyObject,
	SpvOpTranspose,
	SpvOpConvertSToF,
	SpvOpConvertUToF,
	// SpvOpFConvert,
	// SpvOpQuantizeToF16,
	SpvOpFNegate,
	SpvOpFAdd,
	SpvOpFSub,
	SpvOpFMul,
	SpvOpFDiv,
	SpvOpFMod,
	SpvOpVectorTimesScalar,
	SpvOpMatrixTimesScalar,
	SpvOpVectorTimesMatrix,
	SpvOpMatrixTimesVector,
	SpvOpMatrixTimesMatrix,
	SpvOpOuterProduct,
	SpvOpDot,
	SpvOpSelect,
	SpvOpFOrdEqual,
	SpvOpFUnordEqual,
	SpvOpFOrdNotEqual,
	SpvOpFUnordNotEqual,
	SpvOpFOrdLessThan,
	SpvOpFUnordLessThan,
	SpvOpFOrdGreaterThan,
	SpvOpFUnordGreaterThan,
	SpvOpFOrdLessThanEqual,
	SpvOpFUnordLessThanEqual,
	SpvOpFOrdGreaterThanEqual,
	SpvOpFUnordGreaterThanEqual,
	};
	target_ops_450_ = {
	GLSLstd450Round, GLSLstd450RoundEven, GLSLstd450Trunc, GLSLstd450FAbs,
	GLSLstd450FSign, GLSLstd450Floor, GLSLstd450Ceil, GLSLstd450Fract,
	GLSLstd450Radians, GLSLstd450Degrees, GLSLstd450Sin, GLSLstd450Cos,
	GLSLstd450Tan, GLSLstd450Asin, GLSLstd450Acos, GLSLstd450Atan,
	GLSLstd450Sinh, GLSLstd450Cosh, GLSLstd450Tanh, GLSLstd450Asinh,
	GLSLstd450Acosh, GLSLstd450Atanh, GLSLstd450Atan2, GLSLstd450Pow,
	GLSLstd450Exp, GLSLstd450Log, GLSLstd450Exp2, GLSLstd450Log2,
	GLSLstd450Sqrt, GLSLstd450InverseSqrt, GLSLstd450Determinant,
	GLSLstd450MatrixInverse,
	// TODO(greg-lunarg): GLSLstd450ModfStruct,
	GLSLstd450FMin, GLSLstd450FMax, GLSLstd450FClamp, GLSLstd450FMix,
	GLSLstd450Step, GLSLstd450SmoothStep, GLSLstd450Fma,
	// TODO(greg-lunarg): GLSLstd450FrexpStruct,
	GLSLstd450Ldexp, GLSLstd450Length, GLSLstd450Distance, GLSLstd450Cross,
	GLSLstd450Normalize, GLSLstd450FaceForward, GLSLstd450Reflect,
	GLSLstd450Refract, GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp};
	image_ops_ = {SpvOpImageSampleImplicitLod,
	SpvOpImageSampleExplicitLod,
	SpvOpImageSampleDrefImplicitLod,
	SpvOpImageSampleDrefExplicitLod,
	SpvOpImageSampleProjImplicitLod,
	SpvOpImageSampleProjExplicitLod,
	SpvOpImageSampleProjDrefImplicitLod,
	SpvOpImageSampleProjDrefExplicitLod,
	SpvOpImageFetch,
	SpvOpImageGather,
	SpvOpImageDrefGather,
	SpvOpImageRead,
	SpvOpImageSparseSampleImplicitLod,
	SpvOpImageSparseSampleExplicitLod,
	SpvOpImageSparseSampleDrefImplicitLod,
	SpvOpImageSparseSampleDrefExplicitLod,
	SpvOpImageSparseSampleProjImplicitLod,
	SpvOpImageSparseSampleProjExplicitLod,
	SpvOpImageSparseSampleProjDrefImplicitLod,
	SpvOpImageSparseSampleProjDrefExplicitLod,
	SpvOpImageSparseFetch,
	SpvOpImageSparseGather,
	SpvOpImageSparseDrefGather,
	SpvOpImageSparseTexelsResident,
	SpvOpImageSparseRead};
	dref_image_ops_ = {
	SpvOpImageSampleDrefImplicitLod,
	SpvOpImageSampleDrefExplicitLod,
	SpvOpImageSampleProjDrefImplicitLod,
	SpvOpImageSampleProjDrefExplicitLod,
	SpvOpImageDrefGather,
	SpvOpImageSparseSampleDrefImplicitLod,
	SpvOpImageSparseSampleDrefExplicitLod,
	SpvOpImageSparseSampleProjDrefImplicitLod,
	SpvOpImageSparseSampleProjDrefExplicitLod,
	SpvOpImageSparseDrefGather,
	};
	closure_ops_ = {
	SpvOpVectorExtractDynamic,
	SpvOpVectorInsertDynamic,
	SpvOpVectorShuffle,
	SpvOpCompositeConstruct,
	SpvOpCompositeInsert,
	SpvOpCompositeExtract,
	SpvOpCopyObject,
	SpvOpTranspose,
	SpvOpPhi,
	};
	relaxed_ids_set_.clear();
	converted_ids_.clear();
	}

	} // namespace opt
	} // namespace spvtools