Use three-address form of imul Previously we did not take advantage of the three address versions of the imul instruction. With this we are able to avoid some copies before imuls. BUG= R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1365433004 .

commit: e11f878a3569f9e316c478d2e5595e0b000d0720 [log] [tgz]
author: David Sehr <sehr@chromium.org> Tue Oct 06 10:26:57 2015 -0700
committer: David Sehr <sehr@chromium.org> Tue Oct 06 10:26:57 2015 -0700
tree: a96a3f995e3ec6672f01b9b4b9b6d07f120314be
parent: 578f1161ad7a3ef98c90d88e5734940523d1f8e0 [diff]
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 7c4be6f..f9de5a5 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h

@@ -751,6 +751,11 @@
   void imul(Type Ty, typename Traits::GPRRegister reg);
   void imul(Type Ty, const typename Traits::Address &address);
 
+  void imul(Type Ty, typename Traits::GPRRegister dst,
+            typename Traits::GPRRegister src, const Immediate &imm);
+  void imul(Type Ty, typename Traits::GPRRegister dst,
+            const typename Traits::Address &address, const Immediate &imm);
+
   void mul(Type Ty, typename Traits::GPRRegister reg);
   void mul(Type Ty, const typename Traits::Address &address);
 

diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index b1013d6..9bdc449 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h

@@ -2581,6 +2581,46 @@
 }
 
 template <class Machine>
+void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister dst,
+                                     typename Traits::GPRRegister src,
+                                     const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitRexRB(Ty, dst, src);
+  if (imm.is_int8()) {
+    emitUint8(0x6B);
+    emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
+    emitUint8(imm.value() & 0xFF);
+  } else {
+    emitUint8(0x69);
+    emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
+    emitImmediate(Ty, imm);
+  }
+}
+
+template <class Machine>
+void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister dst,
+                                     const typename Traits::Address &address,
+                                     const Immediate &imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+  assert(Ty == IceType_i16 || Ty == IceType_i32);
+  if (Ty == IceType_i16)
+    emitOperandSizeOverride();
+  emitRex(Ty, address, dst);
+  if (imm.is_int8()) {
+    emitUint8(0x6B);
+    emitOperand(gprEncoding(dst), address);
+    emitUint8(imm.value() & 0xFF);
+  } else {
+    emitUint8(0x69);
+    emitOperand(gprEncoding(dst), address);
+    emitImmediate(Ty, imm);
+  }
+}
+
+template <class Machine>
 void AssemblerX86Base<Machine>::mul(Type Ty, typename Traits::GPRRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   if (Ty == IceType_i16)

diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 9b0b5fc..36b34ac 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h

@@ -68,6 +68,7 @@
     Icmp,
     Idiv,
     Imul,
+    ImulImm,
     Insertps,
     Jmp,
     Label,
@@ -1622,6 +1623,25 @@
 };
 
 template <class Machine>
+class InstX86ImulImm
+    : public InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::ImulImm> {
+public:
+  static InstX86ImulImm *create(Cfg *Func, Variable *Dest, Operand *Source0,
+                                Operand *Source1) {
+    return new (Func->allocate<InstX86ImulImm>())
+        InstX86ImulImm(Func, Dest, Source0, Source1);
+  }
+
+  void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
+
+private:
+  InstX86ImulImm(Cfg *Func, Variable *Dest, Operand *Source0, Operand *Source1)
+      : InstX86BaseThreeAddressop<Machine, InstX86Base<Machine>::ImulImm>(
+            Func, Dest, Source0, Source1) {}
+};
+
+template <class Machine>
 class InstX86Mulps
     : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::Mulps, true> {
 public:
@@ -2790,6 +2810,7 @@
   using XorRMW = InstX86XorRMW<Machine>;
   using Pxor = InstX86Pxor<Machine>;
   using Imul = InstX86Imul<Machine>;
+  using ImulImm = InstX86ImulImm<Machine>;
   using Mulps = InstX86Mulps<Machine>;
   using Mulss = InstX86Mulss<Machine>;
   using Pmull = InstX86Pmull<Machine>;
@@ -2897,6 +2918,7 @@
   template <> const char *InstX86XorRMW<Machine>::Base::Opcode = "xor";        \
   template <> const char *InstX86Pxor<Machine>::Base::Opcode = "pxor";         \
   template <> const char *InstX86Imul<Machine>::Base::Opcode = "imul";         \
+  template <> const char *InstX86ImulImm<Machine>::Base::Opcode = "imul";      \
   template <> const char *InstX86Mulps<Machine>::Base::Opcode = "mulps";       \
   template <> const char *InstX86Mulss<Machine>::Base::Opcode = "mulss";       \
   template <> const char *InstX86Pmull<Machine>::Base::Opcode = "pmull";       \

diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index c6ebd3a..f2e7ff2 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h

@@ -1330,8 +1330,8 @@
         &InstX86Base<Machine>::Traits::Assembler::imul};
     emitIASOpTyGPR<Machine>(Func, Ty, this->getSrc(1), Emitter);
   } else {
-    // We only use imul as a two-address instruction even though there is a 3
-    // operand version when one of the operands is a constant.
+    // The two-address version is used when multiplying by a non-constant
+    // or doing an 8-bit multiply.
     assert(Var == this->getSrc(0));
     static const typename InstX86Base<
         Machine>::Traits::Assembler::GPREmitterRegOp Emitter = {
@@ -1343,6 +1343,43 @@
 }
 
 template <class Machine>
+void InstX86ImulImm<Machine>::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 2);
+  Variable *Dest = this->getDest();
+  assert(Dest->getType() == IceType_i16 || Dest->getType() == IceType_i32);
+  assert(llvm::isa<Constant>(this->getSrc(1)));
+  Str << "\timul" << this->getWidthString(Dest->getType()) << "\t";
+  this->getSrc(1)->emit(Func);
+  Str << ", ";
+  this->getSrc(0)->emit(Func);
+  Str << ", ";
+  Dest->emit(Func);
+}
+
+template <class Machine>
+void InstX86ImulImm<Machine>::emitIAS(const Cfg *Func) const {
+  assert(this->getSrcSize() == 2);
+  const Variable *Dest = this->getDest();
+  Type Ty = Dest->getType();
+  assert(llvm::isa<Constant>(this->getSrc(1)));
+  static const typename InstX86Base<Machine>::Traits::Assembler::
+      template ThreeOpImmEmitter<
+          typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
+          typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister>
+          Emitter = {&InstX86Base<Machine>::Traits::Assembler::imul,
+                     &InstX86Base<Machine>::Traits::Assembler::imul};
+  emitIASThreeOpImmOps<
+      Machine, typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
+      typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR,
+      InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR>(
+      Func, Ty, Dest, this->getSrc(0), this->getSrc(1), Emitter);
+}
+
+template <class Machine>
 void InstX86Insertps<Machine>::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 3);
   assert(static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(

diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 4875e65..1df99e5 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h

@@ -428,6 +428,9 @@
   void _imul(Variable *Dest, Operand *Src0) {
     Context.insert(Traits::Insts::Imul::create(Func, Dest, Src0));
   }
+  void _imul_imm(Variable *Dest, Operand *Src0, Constant *Imm) {
+    Context.insert(Traits::Insts::ImulImm::create(Func, Dest, Src0, Imm));
+  }
   void _insertps(Variable *Dest, Operand *Src0, Operand *Src1) {
     Context.insert(Traits::Insts::Insertps::create(Func, Dest, Src0, Src1));
   }

diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 677bc98..00c2870 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h

@@ -1618,11 +1618,17 @@
     if (isByteSizedArithType(Dest->getType())) {
       _mov(T, Src0, Traits::RegisterSet::Reg_eax);
       Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
+      _imul(T, Src0 == Src1 ? T : Src1);
+      _mov(Dest, T);
+    } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {
+      T = makeReg(Dest->getType());
+      _imul_imm(T, Src0, ImmConst);
+      _mov(Dest, T);
     } else {
       _mov(T, Src0);
+      _imul(T, Src0 == Src1 ? T : Src1);
+      _mov(Dest, T);
     }
-    _imul(T, Src0 == Src1 ? T : Src1);
-    _mov(Dest, T);
     break;
   case InstArithmetic::Shl:
     _mov(T, Src0);

diff --git a/tests_lit/assembler/x86/immediate_encodings.ll b/tests_lit/assembler/x86/immediate_encodings.ll
index 0c96720..e16ade3 100644
--- a/tests_lit/assembler/x86/immediate_encodings.ll
+++ b/tests_lit/assembler/x86/immediate_encodings.ll

@@ -250,6 +250,88 @@
 ; CHECK-LABEL: testMul32Imm16Neg
 ; CHECK: 69 c0 01 ff ff ff  imul eax,eax,0xffffff01
 
+define i32 @testMul32Imm32ThreeAddress(i32 %a) {
+entry:
+  %mul = mul i32 232, %a
+  %add = add i32 %mul, %a
+  ret i32 %add
+}
+; CHECK-LABEL: testMul32Imm32ThreeAddress
+; CHECK: 69 c8 e8 00 00 00  imul ecx,eax,0xe8
+
+define i32 @testMul32Mem32Imm32ThreeAddress(i32 %addr_arg) {
+entry:
+  %__1 = inttoptr i32 %addr_arg to i32*
+  %a = load i32, i32* %__1, align 1
+  %mul = mul i32 232, %a
+  ret i32 %mul
+}
+; CHECK-LABEL: testMul32Mem32Imm32ThreeAddress
+; CHECK: 69 00 e8 00 00 00  imul eax,DWORD PTR [eax],0xe8
+
+define i32 @testMul32Imm8ThreeAddress(i32 %a) {
+entry:
+  %mul = mul i32 127, %a
+  %add = add i32 %mul, %a
+  ret i32 %add
+}
+; CHECK-LABEL: testMul32Imm8ThreeAddress
+; CHECK: 6b c8 7f imul ecx,eax,0x7f
+
+define i32 @testMul32Mem32Imm8ThreeAddress(i32 %addr_arg) {
+entry:
+  %__1 = inttoptr i32 %addr_arg to i32*
+  %a = load i32, i32* %__1, align 1
+  %mul = mul i32 127, %a
+  ret i32 %mul
+}
+; CHECK-LABEL: testMul32Mem32Imm8ThreeAddress
+; CHECK: 6b 00 7f imul eax,DWORD PTR [eax],0x7f
+
+define i32 @testMul16Imm16ThreeAddress(i32 %a) {
+entry:
+  %arg_i16 = trunc i32 %a to i16
+  %mul = mul i16 232, %arg_i16
+  %add = add i16 %mul, %arg_i16
+  %result = zext i16 %add to i32
+  ret i32 %result
+}
+; CHECK-LABEL: testMul16Imm16ThreeAddress
+; CHECK: 66 69 c8 e8 00 imul cx,ax,0xe8
+
+define i32 @testMul16Mem16Imm16ThreeAddress(i32 %addr_arg) {
+entry:
+  %__1 = inttoptr i32 %addr_arg to i16*
+  %a = load i16, i16* %__1, align 1
+  %mul = mul i16 232, %a
+  %result = zext i16 %mul to i32
+  ret i32 %result
+}
+; CHECK-LABEL: testMul16Mem16Imm16ThreeAddress
+; CHECK: 66 69 00 e8 00 imul ax,WORD PTR [eax],0xe8
+
+define i32 @testMul16Imm8ThreeAddress(i32 %a) {
+entry:
+  %arg_i16 = trunc i32 %a to i16
+  %mul = mul i16 127, %arg_i16
+  %add = add i16 %mul, %arg_i16
+  %result = zext i16 %add to i32
+  ret i32 %result
+}
+; CHECK-LABEL: testMul16Imm8ThreeAddress
+; CHECK: 66 6b c8 7f imul cx,ax,0x7f
+
+define i32 @testMul16Mem16Imm8ThreeAddress(i32 %addr_arg) {
+entry:
+  %__1 = inttoptr i32 %addr_arg to i16*
+  %a = load i16, i16* %__1, align 1
+  %mul = mul i16 127, %a
+  %result = zext i16 %mul to i32
+  ret i32 %result
+}
+; CHECK-LABEL: testMul16Mem16Imm8ThreeAddress
+; CHECK: 66 6b 00 7f imul ax,WORD PTR [eax],0x7f
+
 ; The GPR shift instructions either allow an 8-bit immediate or
 ; have a special encoding for "1".
 define internal i32 @testShl16Imm8(i32 %arg) {
commit	e11f878a3569f9e316c478d2e5595e0b000d0720	[log] [tgz]
author	David Sehr <sehr@chromium.org>	Tue Oct 06 10:26:57 2015 -0700
committer	David Sehr <sehr@chromium.org>	Tue Oct 06 10:26:57 2015 -0700
tree	a96a3f995e3ec6672f01b9b4b9b6d07f120314be
parent	578f1161ad7a3ef98c90d88e5734940523d1f8e0 [diff]