emitIAS for store and indirect calls.

We can't do direct calls via the .long sym hack, since that is
normally for an absolute relocation, but calls are expecting
relative relocations (except for reg/mem forms).

Nop-out the InstFake emitIAS methods. Remove the generic dispatcher
that redirects emitIAS() to emit(), since only branches and labels
are left.

BUG=none
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/647193003
diff --git a/src/IceInst.h b/src/IceInst.h
index c34e6c9..bbd817b 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -730,7 +730,7 @@
     return new (Func->allocateInst<InstFakeDef>()) InstFakeDef(Func, Dest, Src);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override { emit(Func); }
+  void emitIAS(const Cfg * /* Func */) const override {}
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return Inst->getKind() == FakeDef; }
 
@@ -753,7 +753,7 @@
     return new (Func->allocateInst<InstFakeUse>()) InstFakeUse(Func, Src);
   }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override { emit(Func); }
+  void emitIAS(const Cfg * /* Func */) const override {}
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return Inst->getKind() == FakeUse; }
 
@@ -783,7 +783,7 @@
   }
   const Inst *getLinked() const { return Linked; }
   void emit(const Cfg *Func) const override;
-  void emitIAS(const Cfg *Func) const override { emit(Func); }
+  void emitIAS(const Cfg * /* Func */) const override {}
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return Inst->getKind() == FakeKill; }
 
@@ -811,7 +811,6 @@
       : Inst(Func, Kind, MaxSrcs, Dest) {
     assert(Kind >= Target);
   }
-  void emitIAS(const Cfg *Func) const override { emit(Func); }
   ~InstTarget() override {}
 };
 
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index e41fe82..dbacd58 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -447,6 +447,52 @@
   Func->getTarget()->resetStackAdjustment();
 }
 
+void InstX8632Call::emitIAS(const Cfg *Func) const {
+  x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
+  intptr_t StartPosition = Asm->GetPosition();
+  Operand *Target = getCallTarget();
+  bool NeedsFallback = false;
+  if (const auto Var = llvm::dyn_cast<Variable>(Target)) {
+    if (Var->hasReg()) {
+      Asm->call(RegX8632::getEncodedGPR(Var->getRegNum()));
+    } else {
+      Asm->call(static_cast<TargetX8632 *>(Func->getTarget())
+                    ->stackVarToAsmOperand(Var));
+    }
+  } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Target)) {
+    assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
+    Asm->call(Mem->toAsmAddress(Asm));
+  } else if (const auto CR = llvm::dyn_cast<ConstantRelocatable>(Target)) {
+    assert(CR->getOffset() == 0 && "We only support calling a function");
+    Asm->call(CR);
+    NeedsFallback = true;
+  } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Target)) {
+    // NaCl trampoline calls refer to an address within the sandbox directly.
+    // This is usually only needed for non-IRT builds and otherwise not
+    // very portable or stable. For this, we would use the 0xE8 opcode
+    // (relative version of call) and there should be a PC32 reloc too.
+    // The PC32 reloc will have symbol index 0, and the absolute address
+    // would be encoded as an offset relative to the next instruction.
+    // TODO(jvoung): Do we need to support this?
+    (void)Imm;
+    llvm_unreachable("Unexpected call to absolute address");
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+  if (NeedsFallback) {
+    // TODO(jvoung): The ".long sym" hack doesn't work, since we need
+    // a pc-rel relocation and not an absolute relocation.
+    //
+    // Still, we have at least filled the assembler buffer so that the
+    // instruction sizes/positions are correct for jumps.
+    // For now, fall back to the regular .s emission, after filling the buffer.
+    emit(Func);
+  } else {
+    emitIASBytes(Func, Asm, StartPosition);
+  }
+  Func->getTarget()->resetStackAdjustment();
+}
+
 void InstX8632Call::dump(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrDump();
   if (getDest()) {
@@ -539,8 +585,7 @@
         x86::DisplacementRelocation::create(Asm, FK_Abs_4, Reloc);
     (Asm->*(Emitter.GPRImm))(Ty, VarReg, x86::Immediate(Fixup));
   } else if (const auto Split = llvm::dyn_cast<VariableSplit>(Src)) {
-    x86::Address SrcAddr = Split->toAsmAddress(Func);
-    (Asm->*(Emitter.GPRAddr))(Ty, VarReg, SrcAddr);
+    (Asm->*(Emitter.GPRAddr))(Ty, VarReg, Split->toAsmAddress(Func));
   } else {
     llvm_unreachable("Unexpected operand type");
   }
@@ -570,6 +615,25 @@
   emitIASBytes(Func, Asm, StartPosition);
 }
 
+void emitIASAsAddrOpTyGPR(const Cfg *Func, Type Ty, const Operand *Op0,
+                          const Operand *Op1,
+                          const x86::AssemblerX86::GPREmitterAddrOp &Emitter) {
+  if (const auto Op0Var = llvm::dyn_cast<Variable>(Op0)) {
+    assert(!Op0Var->hasReg());
+    x86::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
+                               ->stackVarToAsmOperand(Op0Var));
+    emitIASAddrOpTyGPR(Func, Ty, StackAddr, Op1, Emitter);
+  } else if (const auto Op0Mem = llvm::dyn_cast<OperandX8632Mem>(Op0)) {
+    x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
+    Op0Mem->emitSegmentOverride(Asm);
+    emitIASAddrOpTyGPR(Func, Ty, Op0Mem->toAsmAddress(Asm), Op1, Emitter);
+  } else if (const auto Split = llvm::dyn_cast<VariableSplit>(Op0)) {
+    emitIASAddrOpTyGPR(Func, Ty, Split->toAsmAddress(Func), Op1, Emitter);
+  } else {
+    llvm_unreachable("Unexpected operand type");
+  }
+}
+
 void emitIASGPRShift(const Cfg *Func, Type Ty, const Variable *Var,
                      const Operand *Src,
                      const x86::AssemblerX86::GPREmitterShiftOp &Emitter) {
@@ -1662,20 +1726,13 @@
   static const x86::AssemblerX86::GPREmitterAddrOp AddrEmitter = {
     &x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp
   };
-  if (const Variable *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
+  if (const auto SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
     if (SrcVar0->hasReg()) {
       emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
-    } else {
-      x86::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
-                                 ->stackVarToAsmOperand(SrcVar0));
-      emitIASAddrOpTyGPR(Func, Ty, StackAddr, Src1, AddrEmitter);
+      return;
     }
-  } else if (const OperandX8632Mem *SrcMem0 =
-                 llvm::dyn_cast<OperandX8632Mem>(Src0)) {
-    x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-    SrcMem0->emitSegmentOverride(Asm);
-    emitIASAddrOpTyGPR(Func, Ty, SrcMem0->toAsmAddress(Asm), Src1, AddrEmitter);
   }
+  emitIASAsAddrOpTyGPR(Func, Ty, Src0, Src1, AddrEmitter);
 }
 
 void InstX8632Icmp::dump(const Cfg *Func) const {
@@ -1754,22 +1811,14 @@
   static const x86::AssemblerX86::GPREmitterAddrOp AddrEmitter = {
     &x86::AssemblerX86::test, &x86::AssemblerX86::test
   };
-  if (const Variable *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
+  if (const auto SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
     if (SrcVar0->hasReg()) {
       emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
-    } else {
-      llvm_unreachable("Nothing actually generates this so it's untested");
-      x86::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
-                                 ->stackVarToAsmOperand(SrcVar0));
-      emitIASAddrOpTyGPR(Func, Ty, StackAddr, Src1, AddrEmitter);
+      return;
     }
-  } else if (const OperandX8632Mem *SrcMem0 =
-                 llvm::dyn_cast<OperandX8632Mem>(Src0)) {
-    llvm_unreachable("Nothing actually generates this so it's untested");
-    x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
-    SrcMem0->emitSegmentOverride(Asm);
-    emitIASAddrOpTyGPR(Func, Ty, SrcMem0->toAsmAddress(Asm), Src1, AddrEmitter);
   }
+  llvm_unreachable("Nothing actually generates this so it's untested");
+  emitIASAsAddrOpTyGPR(Func, Ty, Src0, Src1, AddrEmitter);
 }
 
 void InstX8632Test::dump(const Cfg *Func) const {
@@ -1807,6 +1856,38 @@
   Str << "\n";
 }
 
+void InstX8632Store::emitIAS(const Cfg *Func) const {
+  assert(getSrcSize() == 2);
+  const Operand *Dest = getSrc(1);
+  const Operand *Src = getSrc(0);
+  Type DestTy = Dest->getType();
+  if (isScalarFloatingType(DestTy)) {
+    // Src must be a register, since Dest is a Mem operand of some kind.
+    const Variable *SrcVar = llvm::cast<Variable>(Src);
+    assert(SrcVar->hasReg());
+    RegX8632::XmmRegister SrcReg = RegX8632::getEncodedXmm(SrcVar->getRegNum());
+    x86::AssemblerX86 *Asm = Func->getAssembler<x86::AssemblerX86>();
+    intptr_t StartPosition = Asm->GetPosition();
+    if (const auto DestVar = llvm::dyn_cast<Variable>(Dest)) {
+      assert(!DestVar->hasReg());
+      x86::Address StackAddr(static_cast<TargetX8632 *>(Func->getTarget())
+                                 ->stackVarToAsmOperand(DestVar));
+      Asm->movss(DestTy, StackAddr, SrcReg);
+    } else {
+      const auto DestMem = llvm::cast<OperandX8632Mem>(Dest);
+      assert(DestMem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
+      Asm->movss(DestTy, DestMem->toAsmAddress(Asm), SrcReg);
+    }
+    emitIASBytes(Func, Asm, StartPosition);
+    return;
+  } else {
+    assert(isScalarIntegerType(DestTy));
+    static const x86::AssemblerX86::GPREmitterAddrOp GPRAddrEmitter = {
+        &x86::AssemblerX86::mov, &x86::AssemblerX86::mov};
+    emitIASAsAddrOpTyGPR(Func, DestTy, Dest, Src, GPRAddrEmitter);
+  }
+}
+
 void InstX8632Store::dump(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "mov." << getSrc(0)->getType() << " ";
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 8fa4bdb..8235d1f 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -322,6 +322,8 @@
   uint32_t getEmitInstCount() const override { return 0; }
   IceString getName(const Cfg *Func) const;
   void emit(const Cfg *Func) const override;
+  // TODO(jvoung): Filler in.
+  void emitIAS(const Cfg *Func) const override { emit(Func); }
   void dump(const Cfg *Func) const override;
 
 private:
@@ -383,6 +385,8 @@
     return Sum;
   }
   void emit(const Cfg *Func) const override;
+  // TODO(jvoung): Filler in.
+  void emitIAS(const Cfg *Func) const override { emit(Func); }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Br); }
 
@@ -429,6 +433,7 @@
   }
   Operand *getCallTarget() const { return getSrc(0); }
   void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Call); }
 
@@ -1274,6 +1279,7 @@
         InstX8632Store(Func, Value, Mem);
   }
   void emit(const Cfg *Func) const override;
+  void emitIAS(const Cfg *Func) const override;
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Store); }
 
diff --git a/src/assembler_ia32.cpp b/src/assembler_ia32.cpp
index a253b8d..9042cf1 100644
--- a/src/assembler_ia32.cpp
+++ b/src/assembler_ia32.cpp
@@ -75,13 +75,6 @@
   EmitOperand(2, address);
 }
 
-void AssemblerX86::call(Label *label) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitUint8(0xE8);
-  static const int kSize = 5;
-  EmitLabel(label, kSize);
-}
-
 void AssemblerX86::call(const ConstantRelocatable *label) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   intptr_t call_start = buffer_.GetPosition();
diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h
index e33838d..6ad8fae 100644
--- a/src/assembler_ia32.h
+++ b/src/assembler_ia32.h
@@ -461,7 +461,6 @@
    */
   void call(GPRRegister reg);
   void call(const Address &address);
-  void call(Label *label);
   void call(const ConstantRelocatable *label);
 
   static const intptr_t kCallExternalLabelSize = 5;
diff --git a/tests_lit/llvm2ice_tests/64bit.pnacl.ll b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
index 0ba403c..0fd5f4f 100644
--- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -99,9 +99,8 @@
 ; CHECK:      mov     dword ptr [esp + 4]
 ; CHECK-NEXT: mov     dword ptr [esp]
 ; CHECK-NEXT: mov     dword ptr [esp + 8], 123
-; Bundle padding added by -triple=i686-none-nacl.
-; CHECK-NEXT: nop
-; CHECK-NEXT: mov     dword ptr [esp + 16], 3735928559
+; Bundle padding might be added (so not using -NEXT).
+; CHECK:      mov     dword ptr [esp + 16], 3735928559
 ; CHECK-NEXT: mov     dword ptr [esp + 12], 305419896
 ; CHECK-NEXT: call    -4
 ; CALLTARGETS: call ignore64BitArgNoInline
@@ -111,9 +110,8 @@
 ; OPTM1:      mov     dword ptr [esp + 4]
 ; OPTM1-NEXT: mov     dword ptr [esp]
 ; OPTM1-NEXT: mov     dword ptr [esp + 8], 123
-; Bundle padding added by -triple=i686-none-nacl.
-; OPTM1-NEXT: nop
-; OPTM1-NEXT: mov     dword ptr [esp + 16], 3735928559
+; Bundle padding might be added (so not using -NEXT).
+; OPTM1:      mov     dword ptr [esp + 16], 3735928559
 ; OPTM1-NEXT: mov     dword ptr [esp + 12], 305419896
 ; OPTM1-NEXT: call    -4
 
diff --git a/tests_lit/llvm2ice_tests/8bit.pnacl.ll b/tests_lit/llvm2ice_tests/8bit.pnacl.ll
index 6e98f0e..38c2441 100644
--- a/tests_lit/llvm2ice_tests/8bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/8bit.pnacl.ll
@@ -276,6 +276,20 @@
 ; CHECK-LABEL: icmp8BitMemSwapped
 ; CHECK: cmp {{[abcd]l|byte ptr}}
 
+define internal i32 @selectI8Var(i32 %a, i32 %b) {
+entry:
+  %a_8 = trunc i32 %a to i8
+  %b_8 = trunc i32 %b to i8
+  %cmp = icmp slt i8 %a_8, %b_8
+  %ret = select i1 %cmp, i8 %a_8, i8 %b_8
+  %ret_ext = zext i8 %ret to i32
+  ret i32 %ret_ext
+}
+; CHECK-LABEL: selectI8Var
+; CHECK: cmp
+; CHECK: jl
+; CHECK: mov {{[a-d]l}}
+
 define internal i32 @testPhi8(i32 %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) {
 entry:
   %trunc = trunc i32 %arg to i8
@@ -315,5 +329,46 @@
 ; CHECK-DAG: mov {{.*}}, byte ptr
 ; CHECK-DAG: mov byte ptr {{.*}}
 
+@global8 = internal global [1 x i8] c"\01", align 4
+
+define i32 @load_i8(i32 %addr_arg) {
+entry:
+  %addr = inttoptr i32 %addr_arg to i8*
+  %ret = load i8* %addr, align 1
+  %ret_ext = zext i8 %ret to i32
+  ret i32 %ret_ext
+}
+; CHECK-LABEL: load_i8
+; CHECK: mov {{[a-d]l}}, byte ptr
+
+define i32 @load_i8_global(i32 %addr_arg) {
+entry:
+  %addr = bitcast [1 x i8]* @global8 to i8*
+  %ret = load i8* %addr, align 1
+  %ret_ext = zext i8 %ret to i32
+  ret i32 %ret_ext
+}
+; CHECK-LABEL: load_i8_global
+; CHECK: mov {{[a-d]l}}, byte ptr
+
+define void @store_i8(i32 %addr_arg, i32 %val) {
+entry:
+  %val_trunc = trunc i32 %val to i8
+  %addr = inttoptr i32 %addr_arg to i8*
+  store i8 %val_trunc, i8* %addr, align 1
+  ret void
+}
+; CHECK-LABEL: store_i8
+; CHECK: mov byte ptr {{.*}}, {{[a-d]l}}
+
+define void @store_i8_const(i32 %addr_arg) {
+entry:
+  %addr = inttoptr i32 %addr_arg to i8*
+  store i8 123, i8* %addr, align 1
+  ret void
+}
+; CHECK-LABEL: store_i8_const
+; CHECK: mov byte ptr {{.*}}, 123
+
 ; ERRORS-NOT: ICE translation error
 ; DUMP-NOT: SZ
diff --git a/tests_lit/llvm2ice_tests/callindirect.pnacl.ll b/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
index 2e6c7c1..f70b23d 100644
--- a/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
@@ -26,16 +26,56 @@
   call void %__1()
   ret void
 }
+; CHECK-LABEL: CallIndirect
 ; CHECK: call [[REGISTER:[a-z]+]]
 ; CHECK: call [[REGISTER]]
 ; CHECK: call [[REGISTER]]
 ; CHECK: call [[REGISTER]]
 ; CHECK: call [[REGISTER]]
 ;
+; OPTM1-LABEL: CallIndirect
 ; OPTM1: call [[TARGET:.+]]
 ; OPTM1: call [[TARGET]]
 ; OPTM1: call [[TARGET]]
 ; OPTM1: call [[TARGET]]
 ; OPTM1: call [[TARGET]]
 
+@fp_v = internal global [4 x i8] zeroinitializer, align 4
+
+define internal void @CallIndirectGlobal() {
+entry:
+  %fp_ptr_i32 = bitcast [4 x i8]* @fp_v to i32*
+  %fp_ptr = load i32* %fp_ptr_i32, align 1
+  %fp = inttoptr i32 %fp_ptr to void ()*
+  call void %fp()
+  call void %fp()
+  call void %fp()
+  call void %fp()
+  ret void
+}
+; CHECK-LABEL: CallIndirectGlobal
+; CHECK: call [[REGISTER:[a-z]+]]
+; CHECK: call [[REGISTER]]
+; CHECK: call [[REGISTER]]
+; CHECK: call [[REGISTER]]
+;
+; OPTM1-LABEL: CallIndirectGlobal
+; OPTM1: call [[TARGET:.+]]
+; OPTM1: call [[TARGET]]
+; OPTM1: call [[TARGET]]
+; OPTM1: call [[TARGET]]
+
+; Calling an absolute address is used for non-IRT PNaCl pexes to directly
+; access syscall trampolines. Do we need to support this?
+; define internal void @CallIndirectConst() {
+; entry:
+;   %__1 = inttoptr i32 66496 to void ()*
+;   call void %__1()
+;   call void %__1()
+;   call void %__1()
+;   call void %__1()
+;   call void %__1()
+;   ret void
+; }
+
 ; ERRORS-NOT: ICE translation error