Add NOP to ARM IR lowering.
This change allows pnacl-sz to randomly insert NOPs into the generated
code, as is already done with X86.
BUG=None
R=eholk@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/1670413002 .
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index 920a933..b1d052c 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -1221,6 +1221,12 @@
emitUsingTextFixup(Func);
}
+InstARM32Dmb::InstARM32Dmb(Cfg *Func)
+ : InstARM32Pred(Func, InstARM32::Dmb, 0, nullptr, CondARM32::AL) {}
+
+InstARM32Nop::InstARM32Nop(Cfg *Func)
+ : InstARM32Pred(Func, InstARM32::Nop, 0, nullptr, CondARM32::AL) {}
+
InstARM32Vcmp::InstARM32Vcmp(Cfg *Func, Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, InstARM32::Vcmp, 2, nullptr, Predicate) {
@@ -1240,9 +1246,6 @@
addSource(Src);
}
-InstARM32Dmb::InstARM32Dmb(Cfg *Func)
- : InstARM32Pred(Func, InstARM32::Dmb, 0, nullptr, CondARM32::AL) {}
-
// ======================== Dump routines ======================== //
// Two-addr ops
@@ -2447,6 +2450,26 @@
"sy";
}
+void InstARM32Nop::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ assert(getSrcSize() == 0);
+ Func->getContext()->getStrEmit() << "\t"
+ << "nop";
+}
+
+void InstARM32Nop::emitIAS(const Cfg *Func) const {
+ assert(getSrcSize() == 0);
+ Func->getAssembler<ARM32::AssemblerARM32>()->nop();
+}
+
+void InstARM32Nop::dump(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ assert(getSrcSize() == 0);
+ Func->getContext()->getStrDump() << "nop";
+}
+
void OperandARM32Mem::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h
index 504123b..4d8f02b 100644
--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -397,6 +397,7 @@
Ldrex,
Lsl,
Lsr,
+ Nop,
Mla,
Mls,
Mov,
@@ -1494,6 +1495,24 @@
explicit InstARM32Dmb(Cfg *Func);
};
+class InstARM32Nop final : public InstARM32Pred {
+ InstARM32Nop() = delete;
+ InstARM32Nop(const InstARM32Nop &) = delete;
+ InstARM32Nop &operator=(const InstARM32Nop &) = delete;
+
+public:
+ static InstARM32Nop *create(Cfg *Func) {
+ return new (Func->allocate<InstARM32Nop>()) InstARM32Nop(Func);
+ }
+ void emit(const Cfg *Func) const override;
+ void emitIAS(const Cfg *Func) const override;
+ void dump(const Cfg *Func) const override;
+ static bool classof(const Inst *Instr) { return isClassof(Instr, Nop); }
+
+private:
+ explicit InstARM32Nop(Cfg *Func);
+};
+
// Declare partial template specializations of emit() methods that already have
// default implementations. Without this, there is the possibility of ODR
// violations and link errors.
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 099ceb2..9238541 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -5360,7 +5360,7 @@
RandomNumberGenerator &RNG) {
RandomNumberGeneratorWrapper RNGW(RNG);
if (RNGW.getTrueWithProbability(Probability)) {
- UnimplementedError(Func->getContext()->getFlags());
+ _nop();
}
}
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index b35649c..97a944c 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -418,6 +418,8 @@
}
}
+ void _nop() { Context.insert<InstARM32Nop>(); }
+
// Generates a vmov instruction to extract the given index from a vector
// register.
void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index,
diff --git a/tests_lit/llvm2ice_tests/nop-insertion.ll b/tests_lit/llvm2ice_tests/nop-insertion.ll
index bfefdcc..edacff9 100644
--- a/tests_lit/llvm2ice_tests/nop-insertion.ll
+++ b/tests_lit/llvm2ice_tests/nop-insertion.ll
@@ -17,6 +17,10 @@
; RUN: %p2i -i %s --filetype=asm --sandbox -a -sz-seed=1 -nop-insertion \
; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=1 \
; RUN: | FileCheck %s --check-prefix=SANDBOX50
+; RUN: %p2i -i %s --filetype=asm --sandbox --target=arm32 -a -sz-seed=1 \
+; RUN: -nop-insertion -nop-insertion-percentage=110 \
+; RUN: -max-nops-per-instruction=2 \
+; RUN: | FileCheck %s --check-prefix=ARM110P2
define internal <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
@@ -137,4 +141,69 @@
; SANDBOX50: jmp *%ecx
; SANDBOX50: .bundle_unlock
+; ARM110P2: mul_v4i32:
+; ARM110P2-NEXT: .Lmul_v4i32$entry:
+; ARM110P2-NEXT: .bundle_lock
+; ARM110P2-NEXT: sub sp, sp, #48
+; ARM110P2-NEXT: bic sp, sp, #3221225472
+; ARM110P2-NEXT: .bundle_unlock
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: .bundle_lock
+; ARM110P2-NEXT: add ip, sp, #32
+; ARM110P2-NEXT: bic ip, ip, #3221225472
+; ARM110P2-NEXT: vst1.32 q0, [ip]
+; ARM110P2-NEXT: .bundle_unlock
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: # [sp, #32] = def.pseudo
+; ARM110P2-NEXT: .bundle_lock
+; ARM110P2-NEXT: add ip, sp, #16
+; ARM110P2-NEXT: bic ip, ip, #3221225472
+; ARM110P2-NEXT: vst1.32 q1, [ip]
+; ARM110P2-NEXT: .bundle_unlock
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: # [sp, #16] = def.pseudo
+; ARM110P2-NEXT: .bundle_lock
+; ARM110P2-NEXT: add ip, sp, #32
+; ARM110P2-NEXT: bic ip, ip, #3221225472
+; ARM110P2-NEXT: vld1.32 q0, [ip]
+; ARM110P2-NEXT: .bundle_unlock
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: .bundle_lock
+; ARM110P2-NEXT: add ip, sp, #16
+; ARM110P2-NEXT: bic ip, ip, #3221225472
+; ARM110P2-NEXT: vld1.32 q1, [ip]
+; ARM110P2-NEXT: .bundle_unlock
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: vmul.i32 q0, q0, q1
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: .bundle_lock
+; ARM110P2-NEXT: vst1.32 q0, [sp]
+; ARM110P2-NEXT: .bundle_unlock
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: # [sp] = def.pseudo
+; ARM110P2-NEXT: .bundle_lock
+; ARM110P2-NEXT: vld1.32 q0, [sp]
+; ARM110P2-NEXT: .bundle_unlock
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: .bundle_lock
+; ARM110P2-NEXT: add sp, sp, #48
+; ARM110P2-NEXT: bic sp, sp, #3221225472
+; ARM110P2-NEXT: .bundle_unlock
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: .bundle_lock
+; ARM110P2-NEXT: bic lr, lr, #3221225487
+; ARM110P2-NEXT: bx lr
+; ARM110P2-NEXT: .bundle_unlock
+; ARM110P2-NEXT: nop
+; ARM110P2-NEXT: nop
+
}