Changes the TargetX8632 to inherit from TargetX86Base<TargetX8632>.

Previously, TargetX8632 was defined as

class TargetX8632 : public TargetLowering;

and its create method would do

TargetX8632 *TargetX8632::create() {
  return TargetX86Base<TargetX8632>::create()
}

TargetX86Base<M> was defined was

template <class M> class TargetX86Base : public M;

which meant TargetX8632 had no way to access methods defined in
TargetX86Base<M>. This used to not be a problem, but with the X8664
backend around the corner it became obvious that the actual TargetX86
targets (e.g., X8632. X8664SysV, X8664Win) would need access to some
methods in TargetX86Base (e.g., _mov, _fld, _fstp etc.)

This CL changes the class hierarchy to something like

TargetLowering <-- TargetX86Base<X8632> <-- X8632
               <-- TargetX86Base<X8664SysV> <-- X8664SysV (TODO)
               <-- TargetX86Base<X8664Win> <-- X8664Win (TODO)

One problem with this new design is that TargetX86Base<M> needs to be
able to invoke methods in the actual backends. For example, each
backend will have its own way of lowering llvm.nacl.read.tp. This
creates a chicken/egg problem that is solved with (you guessed)
template machinery (some would call it voodoo.)

In this CL, as a proof of concept, we introduce the

   TargetX86Base::dispatchToConcrete

template method. It is a very simple method: it downcasts "this" from
the template base class (TargetX86Base<TargetX8664>) to the actual
(concrete) class (TargetX8632), and then it invokes the requested
method. It uses perfect forwarding for passing arguments to the method
being invoked, and returns whatever that method returns.

A simple proof-of-concept for using dispatchToConcrete is introduced
with this CL: it is used to invoke createNaClReadTPSrcOperand on the
concrete target class. In a way, dispatchToConcrete is a poor man's
virtual method call, without the virtual method call overhead.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077
R=jvoung@chromium.org, stichnot@chromium.org

Review URL: https://codereview.chromium.org/1217443024.
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 9e857a8..dfa042a 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -81,10 +81,6 @@
 
 } // end of namespace X86Internal
 
-TargetX8632 *TargetX8632::create(Cfg *Func) {
-  return X86Internal::TargetX86Base<TargetX8632>::create(Func);
-}
-
 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)
     : TargetDataLowering(Ctx) {}
 
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index f1d8bb3..1f1d2cb 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -23,10 +23,12 @@
 #include "IceRegistersX8632.h"
 #include "IceTargetLowering.h"
 #include "IceTargetLoweringX8632Traits.h"
+#include "IceTargetLoweringX86Base.h"
 
 namespace Ice {
 
-class TargetX8632 : public TargetLowering {
+class TargetX8632 final
+    : public ::Ice::X86Internal::TargetX86Base<TargetX8632> {
   TargetX8632() = delete;
   TargetX8632(const TargetX8632 &) = delete;
   TargetX8632 &operator=(const TargetX8632 &) = delete;
@@ -34,13 +36,20 @@
 public:
   using X86InstructionSet = X8632::Traits::InstructionSet;
 
-  static TargetX8632 *create(Cfg *Func);
-  virtual X8632::Traits::Address
-  stackVarToAsmOperand(const Variable *Var) const = 0;
-  virtual X86InstructionSet getInstructionSet() const = 0;
+  static TargetX8632 *create(Cfg *Func) { return new TargetX8632(Func); }
 
 protected:
-  explicit TargetX8632(Cfg *Func) : TargetLowering(Func) {}
+  Operand *createNaClReadTPSrcOperand() {
+    Constant *Zero = Ctx->getConstantZero(IceType_i32);
+    return Traits::X86OperandMem::create(Func, IceType_i32, nullptr, Zero,
+                                         nullptr, 0,
+                                         Traits::X86OperandMem::SegReg_GS);
+  }
+
+private:
+  friend class ::Ice::X86Internal::TargetX86Base<TargetX8632>;
+
+  explicit TargetX8632(Cfg *Func) : TargetX86Base(Func) {}
 };
 
 class TargetDataX8632 final : public TargetDataLowering {
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h
index ae9abe1..ca15ea5 100644
--- a/src/IceTargetLoweringX8632Traits.h
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -37,6 +37,7 @@
 
 template <class Machine> struct Insts;
 template <class Machine> struct MachineTraits;
+template <class Machine> class TargetX86Base;
 
 template <> struct MachineTraits<TargetX8632> {
   //----------------------------------------------------------------------------
@@ -518,7 +519,7 @@
   //----------------------------------------------------------------------------
   using Insts = ::Ice::X86Internal::Insts<TargetX8632>;
 
-  using TargetLowering = TargetX8632;
+  using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8632>;
   using Assembler = X8632::AssemblerX8632;
 
   /// X86Operand extends the Operand hierarchy.  Its subclasses are
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index a417ff7..58de721 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -24,6 +24,7 @@
 
 #include <type_traits>
 #include <unordered_map>
+#include <utility>
 
 namespace Ice {
 namespace X86Internal {
@@ -32,76 +33,30 @@
 
 template <class Machine> struct MachineTraits {};
 
-template <class Machine> class TargetX86Base : public Machine {
-  static_assert(std::is_base_of<::Ice::TargetLowering, Machine>::value,
-                "Machine template parameter must be a TargetLowering.");
-
+/// TargetX86Base is a template for all X86 Targets, and it relies on the CRT
+/// pattern for generating code, delegating to actual backends target-specific
+/// lowerings (e.g., call, ret, and intrinsics.) Backends are expected to
+/// implement the following methods (which should be accessible from
+/// TargetX86Base):
+///
+/// Operand *createNaClReadTPSrcOperand()
+///
+/// Note: Ideally, we should be able to
+///
+///   static_assert(std::is_base_of<TargetX86Base<Machine>, Machine>::value);
+///
+/// but that does not work: the compiler does not know that Machine inherits
+/// from TargetX86Base at this point in translation.
+template <class Machine> class TargetX86Base : public TargetLowering {
   TargetX86Base() = delete;
   TargetX86Base(const TargetX86Base &) = delete;
   TargetX86Base &operator=(const TargetX86Base &) = delete;
 
-protected:
-  using Machine::H_bitcast_16xi1_i16;
-  using Machine::H_bitcast_8xi1_i8;
-  using Machine::H_bitcast_i16_16xi1;
-  using Machine::H_bitcast_i8_8xi1;
-  using Machine::H_call_ctpop_i32;
-  using Machine::H_call_ctpop_i64;
-  using Machine::H_call_longjmp;
-  using Machine::H_call_memcpy;
-  using Machine::H_call_memmove;
-  using Machine::H_call_memset;
-  using Machine::H_call_read_tp;
-  using Machine::H_call_setjmp;
-  using Machine::H_fptosi_f32_i64;
-  using Machine::H_fptosi_f64_i64;
-  using Machine::H_fptoui_4xi32_f32;
-  using Machine::H_fptoui_f32_i32;
-  using Machine::H_fptoui_f32_i64;
-  using Machine::H_fptoui_f64_i32;
-  using Machine::H_fptoui_f64_i64;
-  using Machine::H_frem_f32;
-  using Machine::H_frem_f64;
-  using Machine::H_sdiv_i64;
-  using Machine::H_sitofp_i64_f32;
-  using Machine::H_sitofp_i64_f64;
-  using Machine::H_srem_i64;
-  using Machine::H_udiv_i64;
-  using Machine::H_uitofp_4xi32_4xf32;
-  using Machine::H_uitofp_i32_f32;
-  using Machine::H_uitofp_i32_f64;
-  using Machine::H_uitofp_i64_f32;
-  using Machine::H_uitofp_i64_f64;
-  using Machine::H_urem_i64;
-
-  using Machine::alignStackSpillAreas;
-  using Machine::assignVarStackSlots;
-  using Machine::inferTwoAddress;
-  using Machine::makeHelperCall;
-  using Machine::getVarStackSlotParams;
-
 public:
   using Traits = MachineTraits<Machine>;
   using BoolFolding = ::Ice::X86Internal::BoolFolding<Traits>;
 
-  using Machine::RegSet_All;
-  using Machine::RegSet_CalleeSave;
-  using Machine::RegSet_CallerSave;
-  using Machine::RegSet_FramePointer;
-  using Machine::RegSet_None;
-  using Machine::RegSet_StackPointer;
-  using Machine::Context;
-  using Machine::Ctx;
-  using Machine::Func;
-  using RegSetMask = typename Machine::RegSetMask;
-
-  using Machine::_bundle_lock;
-  using Machine::_bundle_unlock;
-  using Machine::_set_dest_nonkillable;
-  using Machine::getContext;
-  using Machine::getStackAdjustment;
-  using Machine::regAlloc;
-  using Machine::resetStackAdjustment;
+  ~TargetX86Base() override = default;
 
   static TargetX86Base *create(Cfg *Func) { return new TargetX86Base(Func); }
 
@@ -156,10 +111,9 @@
   Operand *hiOperand(Operand *Operand);
   void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
                               size_t BasicFrameOffset, size_t &InArgsSizeBytes);
-  typename Traits::Address
-  stackVarToAsmOperand(const Variable *Var) const final;
+  typename Traits::Address stackVarToAsmOperand(const Variable *Var) const;
 
-  typename Traits::InstructionSet getInstructionSet() const final {
+  typename Traits::InstructionSet getInstructionSet() const {
     return InstructionSet;
   }
   Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister);
@@ -628,7 +582,28 @@
   bool RandomizationPoolingPaused = false;
 
 private:
-  ~TargetX86Base() override {}
+  /// dispatchToConcrete is the template voodoo that allows TargetX86Base to
+  /// invoke methods in Machine (which inherits from TargetX86Base) without
+  /// having to rely on virtual method calls. There are two overloads, one for
+  /// non-void types, and one for void types. We need this becase, for non-void
+  /// types, we need to return the method result, where as for void, we don't.
+  /// While it is true that the code compiles without the void "version", there
+  /// used to be a time when compilers would reject such code.
+  ///
+  /// This machinery is far from perfect. Note that, in particular, the
+  /// arguments provided to dispatchToConcrete() need to match the arguments for
+  /// Method **exactly** (i.e., no argument promotion is performed.)
+  template <typename Ret, typename... Args>
+  typename std::enable_if<!std::is_void<Ret>::value, Ret>::type
+  dispatchToConcrete(Ret (Machine::*Method)(Args...), Args &&... args) {
+    return (static_cast<Machine *>(this)->*Method)(std::forward<Args>(args)...);
+  }
+
+  template <typename... Args>
+  void dispatchToConcrete(void (Machine::*Method)(Args...), Args &&... args) {
+    (static_cast<Machine *>(this)->*Method)(std::forward<Args>(args)...);
+  }
+
   BoolFolding FoldingInfo;
 };
 } // end of namespace X86Internal
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 5ceef36..fc0a8e2 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -267,7 +267,7 @@
 
 template <class Machine>
 TargetX86Base<Machine>::TargetX86Base(Cfg *Func)
-    : Machine(Func) {
+    : TargetLowering(Func) {
   static_assert(
       (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==
           (TargetInstructionSet::X86InstructionSet_End -
@@ -455,7 +455,7 @@
   }
 }
 
-bool canRMW(const InstArithmetic *Arith) {
+inline bool canRMW(const InstArithmetic *Arith) {
   Type Ty = Arith->getDest()->getType();
   // X86 vector instructions write to a register and have no RMW option.
   if (isVectorType(Ty))
@@ -579,7 +579,7 @@
               Store->dump(Func);
               Str << "\n";
             }
-            Variable *Beacon = Func->template makeVariable(IceType_i32);
+            Variable *Beacon = Func->makeVariable(IceType_i32);
             Beacon->setWeight(0);
             Store->setRmwBeacon(Beacon);
             InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
@@ -596,7 +596,7 @@
 
 // Converts a ConstantInteger32 operand into its constant value, or
 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
-uint64_t getConstantMemoryOrder(Operand *Opnd) {
+inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
   if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
     return Integer->getValue();
   return Intrinsics::MemoryOrderInvalid;
@@ -607,8 +607,8 @@
 /// true as long as the load dest matches exactly one of the binary
 /// instruction's src operands.  Replaces Src0 or Src1 with LoadSrc if
 /// the answer is true.
-bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
-                               Operand *&Src0, Operand *&Src1) {
+inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
+                                      Operand *&Src0, Operand *&Src1) {
   if (Src0 == LoadDest && Src1 != LoadDest) {
     Src0 = LoadSrc;
     return true;
@@ -727,7 +727,7 @@
   assert(RegNum < PhysicalRegisters[Ty].size());
   Variable *Reg = PhysicalRegisters[Ty][RegNum];
   if (Reg == nullptr) {
-    Reg = Func->template makeVariable(Ty);
+    Reg = Func->makeVariable(Ty);
     Reg->setRegNum(RegNum);
     PhysicalRegisters[Ty][RegNum] = Reg;
     // Specially mark esp as an "argument" so that it is considered
@@ -800,7 +800,7 @@
     // to the assigned location of Arg.
     int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;
     ++NumXmmArgs;
-    Variable *RegisterArg = Func->template makeVariable(Ty);
+    Variable *RegisterArg = Func->makeVariable(Ty);
     if (BuildDefs::dump())
       RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
     RegisterArg->setRegNum(RegNum);
@@ -1115,8 +1115,8 @@
   // jmp *t
   // bundle_unlock
   // FakeUse <original_ret_operand>
-  const SizeT BundleSize =
-      1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();
+  const SizeT BundleSize = 1
+                           << Func->getAssembler<>()->getBundleAlignLog2Bytes();
   Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
   _pop(T_ecx);
   _bundle_lock();
@@ -1148,8 +1148,8 @@
     return;
   }
   assert(Hi == nullptr);
-  Lo = Func->template makeVariable(IceType_i32);
-  Hi = Func->template makeVariable(IceType_i32);
+  Lo = Func->makeVariable(IceType_i32);
+  Hi = Func->makeVariable(IceType_i32);
   if (BuildDefs::dump()) {
     Lo->setName(Func, Var->getName(Func) + "__lo");
     Hi->setName(Func, Var->getName(Func) + "__hi");
@@ -2241,7 +2241,7 @@
       _mov(CallTargetVar, CallTarget);
       _bundle_lock(InstBundleLock::Opt_AlignToEnd);
       const SizeT BundleSize =
-          1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();
+          1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
       _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
       CallTarget = CallTargetVar;
     }
@@ -2670,7 +2670,7 @@
       // TODO: Should be able to force a spill setup by calling legalize() with
       // Legal_Mem and not Legal_Reg or Legal_Imm.
       typename Traits::SpillVariable *SpillVar =
-          Func->template makeVariable<typename Traits::SpillVariable>(SrcType);
+          Func->makeVariable<typename Traits::SpillVariable>(SrcType);
       SpillVar->setLinkedTo(Dest);
       Variable *Spill = SpillVar;
       Spill->setWeight(RegWeight::Zero);
@@ -2690,8 +2690,7 @@
       Operand *SpillLo, *SpillHi;
       if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
         typename Traits::SpillVariable *SpillVar =
-            Func->template makeVariable<typename Traits::SpillVariable>(
-                IceType_f64);
+            Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
         SpillVar->setLinkedTo(Src0Var);
         Variable *Spill = SpillVar;
         Spill->setWeight(RegWeight::Zero);
@@ -2719,7 +2718,7 @@
       Src0 = legalize(Src0);
       assert(Src0->getType() == IceType_i64);
       if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
-        Variable *T = Func->template makeVariable(Dest->getType());
+        Variable *T = Func->makeVariable(Dest->getType());
         _movq(T, Src0);
         _movq(Dest, T);
         break;
@@ -2732,8 +2731,7 @@
       //   hi(s.f64) = t_hi.i32
       //   a.f64 = s.f64
       typename Traits::SpillVariable *SpillVar =
-          Func->template makeVariable<typename Traits::SpillVariable>(
-              IceType_f64);
+          Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
       SpillVar->setLinkedTo(Dest);
       Variable *Spill = SpillVar;
       Spill->setWeight(RegWeight::Zero);
@@ -2756,7 +2754,7 @@
     case IceType_v8i1: {
       assert(Src0->getType() == IceType_i8);
       InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
-      Variable *Src0AsI32 = Func->template makeVariable(stackSlotType());
+      Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
       // Arguments to functions are required to be at least 32 bits wide.
       lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
       Call->addArg(Src0AsI32);
@@ -2765,7 +2763,7 @@
     case IceType_v16i1: {
       assert(Src0->getType() == IceType_i16);
       InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1);
-      Variable *Src0AsI32 = Func->template makeVariable(stackSlotType());
+      Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
       // Arguments to functions are required to be at least 32 bits wide.
       lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
       Call->addArg(Src0AsI32);
@@ -2836,7 +2834,7 @@
     //
     // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
     // support for legalizing to mem is implemented.
-    Variable *Slot = Func->template makeVariable(Ty);
+    Variable *Slot = Func->makeVariable(Ty);
     Slot->setWeight(RegWeight::Zero);
     _movp(Slot, legalizeToReg(SourceVectNotLegalized));
 
@@ -3001,8 +2999,8 @@
         NewTy = IceType_v16i8;
         break;
       }
-      Variable *NewSrc0 = Func->template makeVariable(NewTy);
-      Variable *NewSrc1 = Func->template makeVariable(NewTy);
+      Variable *NewSrc0 = Func->makeVariable(NewTy);
+      Variable *NewSrc1 = Func->makeVariable(NewTy);
       lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
       lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
       Src0 = NewSrc0;
@@ -3144,7 +3142,7 @@
   if (ElementTy == IceType_i1) {
     // Expand the element to the appropriate size for it to be inserted
     // in the vector.
-    Variable *Expanded = Func->template makeVariable(InVectorElementTy);
+    Variable *Expanded = Func->makeVariable(InVectorElementTy);
     InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
                                       ElementToInsertNotLegalized);
     lowerCast(Cast);
@@ -3235,7 +3233,7 @@
     //
     // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
     // support for legalizing to mem is implemented.
-    Variable *Slot = Func->template makeVariable(Ty);
+    Variable *Slot = Func->makeVariable(Ty);
     Slot->setWeight(RegWeight::Zero);
     _movp(Slot, legalizeToReg(SourceVectNotLegalized));
 
@@ -3528,7 +3526,7 @@
     // PNaCl ABI requires arguments to be at least 32 bits wide.
     Operand *ValOp = Instr->getArg(1);
     assert(ValOp->getType() == IceType_i8);
-    Variable *ValExt = Func->template makeVariable(stackSlotType());
+    Variable *ValExt = Func->makeVariable(stackSlotType());
     lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
     InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
     Call->addArg(Instr->getArg(0));
@@ -3539,10 +3537,7 @@
   }
   case Intrinsics::NaClReadTP: {
     if (Ctx->getFlags().getUseSandboxing()) {
-      Constant *Zero = Ctx->getConstantZero(IceType_i32);
-      Operand *Src = Traits::X86OperandMem::create(
-          Func, IceType_i32, nullptr, Zero, nullptr, 0,
-          Traits::X86OperandMem::SegReg_GS);
+      Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand);
       Variable *Dest = Instr->getDest();
       Variable *T = nullptr;
       _mov(T, Src);
@@ -3975,7 +3970,7 @@
   _mov(DestHi, Ctx->getConstantZero(IceType_i32));
 }
 
-bool isAdd(const Inst *Inst) {
+inline bool isAdd(const Inst *Inst) {
   if (const InstArithmetic *Arith =
           llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
     return (Arith->getOp() == InstArithmetic::Add);
@@ -3983,9 +3978,9 @@
   return false;
 }
 
-void dumpAddressOpt(const Cfg *Func, const Variable *Base,
-                    const Variable *Index, uint16_t Shift, int32_t Offset,
-                    const Inst *Reason) {
+inline void dumpAddressOpt(const Cfg *Func, const Variable *Base,
+                           const Variable *Index, uint16_t Shift,
+                           int32_t Offset, const Inst *Reason) {
   if (!BuildDefs::dump())
     return;
   if (!Func->isVerbose(IceV_AddrOpt))
@@ -4007,8 +4002,8 @@
   Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
 }
 
-bool matchTransitiveAssign(const VariablesMetadata *VMetadata, Variable *&Var,
-                           const Inst *&Reason) {
+inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata,
+                                  Variable *&Var, const Inst *&Reason) {
   // Var originates from Var=SrcVar ==>
   //   set Var:=SrcVar
   if (Var == nullptr)
@@ -4032,9 +4027,9 @@
   return false;
 }
 
-bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable *&Base,
-                            Variable *&Index, uint16_t &Shift,
-                            const Inst *&Reason) {
+inline bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata,
+                                   Variable *&Base, Variable *&Index,
+                                   uint16_t &Shift, const Inst *&Reason) {
   // Index==nullptr && Base is Base=Var1+Var2 ==>
   //   set Base=Var1, Index=Var2, Shift=0
   if (Base == nullptr)
@@ -4067,8 +4062,9 @@
   return false;
 }
 
-bool matchShiftedIndex(const VariablesMetadata *VMetadata, Variable *&Index,
-                       uint16_t &Shift, const Inst *&Reason) {
+inline bool matchShiftedIndex(const VariablesMetadata *VMetadata,
+                              Variable *&Index, uint16_t &Shift,
+                              const Inst *&Reason) {
   // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
   //   Index=Var, Shift+=log2(Const)
   if (Index == nullptr)
@@ -4117,8 +4113,8 @@
   return false;
 }
 
-bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
-                     int32_t &Offset, const Inst *&Reason) {
+inline bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
+                            int32_t &Offset, const Inst *&Reason) {
   // Base is Base=Var+Const || Base is Base=Const+Var ==>
   //   set Base=Var, Offset+=Const
   // Base is Base=Var-Const ==>
@@ -4158,8 +4154,9 @@
   return false;
 }
 
-void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
-                       Variable *&Index, uint16_t &Shift, int32_t &Offset) {
+inline void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
+                              Variable *&Index, uint16_t &Shift,
+                              int32_t &Offset) {
   Func->resetCurrentNode();
   if (Func->isVerbose(IceV_AddrOpt)) {
     OstreamLocker L(Func->getContext());
@@ -4348,7 +4345,7 @@
     // Sign extend the condition operand if applicable.
     if (SrcTy == IceType_v4f32) {
       // The sext operation takes only integer arguments.
-      Variable *T3 = Func->template makeVariable(IceType_v4i32);
+      Variable *T3 = Func->makeVariable(IceType_v4i32);
       lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
       _movp(T, T3);
     } else if (typeElementType(SrcTy) != IceType_i1) {
@@ -4766,17 +4763,17 @@
     Constant *Index = Ctx->getConstantInt32(I);
 
     // Extract the next two inputs.
-    Variable *Op0 = Func->template makeVariable(ElementTy);
+    Variable *Op0 = Func->makeVariable(ElementTy);
     lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
-    Variable *Op1 = Func->template makeVariable(ElementTy);
+    Variable *Op1 = Func->makeVariable(ElementTy);
     lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
 
     // Perform the arithmetic as a scalar operation.
-    Variable *Res = Func->template makeVariable(ElementTy);
+    Variable *Res = Func->makeVariable(ElementTy);
     lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
 
     // Insert the result into position.
-    Variable *DestT = Func->template makeVariable(Ty);
+    Variable *DestT = Func->makeVariable(Ty);
     lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
     T = DestT;
   }
@@ -4914,7 +4911,7 @@
       this, Context.getNode(), Func);
 }
 
-bool isMemoryOperand(const Operand *Opnd) {
+inline bool isMemoryOperand(const Operand *Opnd) {
   if (const auto Var = llvm::dyn_cast<Variable>(Opnd))
     return !Var->hasReg();
   // We treat vector undef values the same as a memory operand,
@@ -5023,7 +5020,7 @@
         // TODO(stichnot): Opportunity for register randomization.
         RegNum = RegsForType.find_first();
         Preg = getPhysicalRegister(RegNum, Dest->getType());
-        SpillLoc = Func->template makeVariable(Dest->getType());
+        SpillLoc = Func->makeVariable(Dest->getType());
         // Create a fake def of the physical register to avoid
         // liveness inconsistency problems during late-stage liveness
         // analysis (e.g. asm-verbose mode).
@@ -5365,7 +5362,7 @@
 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
   // There aren't any 64-bit integer registers for x86-32.
   assert(Type != IceType_i64);
-  Variable *Reg = Func->template makeVariable(Type);
+  Variable *Reg = Func->makeVariable(Type);
   if (RegNum == Variable::NoRegister)
     Reg->setWeightInfinite();
   else