Subzero. ARM32. Refactors atomic intrinsics lowering.
BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1409863006 .
diff --git a/crosstest/test_sync_atomic.cpp b/crosstest/test_sync_atomic.cpp
index d1578eb..432002a 100644
--- a/crosstest/test_sync_atomic.cpp
+++ b/crosstest/test_sync_atomic.cpp
@@ -47,7 +47,8 @@
if (fetch) { \
return __sync_fetch_and_##inst(ptr, 42); \
} else { \
- return __sync_##inst##_and_fetch(ptr, 99); \
+ const type value = static_cast<type>(0xaaaaaaaaaaaaaaaaull); \
+ return __sync_##inst##_and_fetch(ptr, value); \
} \
}
diff --git a/pydir/build-runtime.py b/pydir/build-runtime.py
index 2d5cf3d..c4ba6d4 100755
--- a/pydir/build-runtime.py
+++ b/pydir/build-runtime.py
@@ -65,14 +65,27 @@
'-target=' + target_info.triple,
'-c',
'{srcdir}/szrt_profiler.c'.format(srcdir=srcdir),
- '-o', TmpFile('{dir}/szrt_profiler_native_{target}.o')
+ '-o', TmpFile('{dir}/szrt_native_profiler_{target}.o')
+ ], echo=verbose)
+ # Assemble srcdir/szrt_asm_{target}.s to tempdir/szrt_asm_{target}.o.
+ shellcmd(['llvm-mc',
+ '-triple=' + target_info.triple, '--defsym NATIVE=1',
+ '-filetype=obj',
+ '-o', TmpFile('{dir}/szrt_native_asm_{target}.o'),
+ '{srcdir}/szrt_asm_{target}.s'.format(
+ srcdir=srcdir, target=target_info.target)
], echo=verbose)
# Write full szrt_native_{target}.o.
PartialLink([TmpFile('{dir}/szrt_native_{target}.tmp.o'),
- TmpFile('{dir}/szrt_profiler_native_{target}.o')],
+ TmpFile('{dir}/szrt_native_asm_{target}.o'),
+ TmpFile('{dir}/szrt_native_profiler_{target}.o')],
['-m {ld_emu}'.format(ld_emu=target_info.ld_emu)],
OutFile('{rtdir}/szrt_native_{target}.o'),
verbose)
+ shellcmd(['le32-nacl-objcopy',
+ '--strip-symbol=NATIVE',
+ OutFile('{rtdir}/szrt_native_{target}.o')])
+
# Helper function for building the sandboxed runtime.
def MakeSandboxedRuntime():
"""Builds just the sandboxed runtime."""
@@ -82,8 +95,26 @@
Translate(ll_files,
['-mtriple=' + targets.ConvertTripleToNaCl(target_info.triple)] +
target_info.llc_flags,
- OutFile('{rtdir}/szrt_sb_{target}.o'),
+ TmpFile('{dir}/szrt_sb_{target}.tmp.o'),
verbose)
+ # Assemble srcdir/szrt_asm_{target}.s to tempdir/szrt_asm_{target}.o.
+ shellcmd(['llvm-mc',
+ '-triple=' + targets.ConvertTripleToNaCl(target_info.triple),
+ '--defsym NACL=1',
+ '-filetype=obj',
+ '-o', TmpFile('{dir}/szrt_sb_asm_{target}.o'),
+ '{srcdir}/szrt_asm_{target}.s'.format(
+ srcdir=srcdir, target=target_info.target)
+ ], echo=verbose)
+ PartialLink([TmpFile('{dir}/szrt_sb_{target}.tmp.o'),
+ TmpFile('{dir}/szrt_sb_asm_{target}.o')],
+ ['-m {ld_emu}'.format(ld_emu=target_info.sb_emu)],
+ OutFile('{rtdir}/szrt_sb_{target}.o'),
+ verbose)
+ shellcmd(['le32-nacl-objcopy',
+ '--strip-symbol=NACL',
+ OutFile('{rtdir}/szrt_sb_{target}.o')])
+
# Helper function for building the Non-SFI runtime.
def MakeNonsfiRuntime():
"""Builds just the nonsfi runtime."""
@@ -96,18 +127,22 @@
verbose)
# Assemble srcdir/szrt_asm_{target}.s to tempdir/szrt_asm_{target}.o.
shellcmd(['llvm-mc',
- '-triple=' + target_info.triple,
+ '-triple=' + target_info.triple, '--defsym NONSFI=1',
'-filetype=obj',
- '-o', TmpFile('{dir}/szrt_asm_{target}.o'),
+ '-o', TmpFile('{dir}/szrt_nonsfi_asm_{target}.o'),
'{srcdir}/szrt_asm_{target}.s'.format(
srcdir=srcdir, target=target_info.target)
], echo=verbose)
# Write full szrt_nonsfi_{target}.o.
PartialLink([TmpFile('{dir}/szrt_nonsfi_{target}.tmp.o'),
- TmpFile('{dir}/szrt_asm_{target}.o')],
+ TmpFile('{dir}/szrt_nonsfi_asm_{target}.o')],
['-m {ld_emu}'.format(ld_emu=target_info.ld_emu)],
OutFile('{rtdir}/szrt_nonsfi_{target}.o'),
verbose)
+ shellcmd(['le32-nacl-objcopy',
+ '--strip-symbol=NONSFI',
+ OutFile('{rtdir}/szrt_nonsfi_{target}.o')])
+
# Run the helper functions.
MakeNativeRuntime()
diff --git a/pydir/targets.py b/pydir/targets.py
index 5effd4e..ea51c28 100644
--- a/pydir/targets.py
+++ b/pydir/targets.py
@@ -18,13 +18,14 @@
TargetInfo = namedtuple('TargetInfo',
['target', 'compiler_arch', 'triple', 'llc_flags',
- 'ld_emu', 'cross_headers'])
+ 'ld_emu', 'sb_emu', 'cross_headers'])
X8632Target = TargetInfo(target='x8632',
compiler_arch='x8632',
triple='i686-none-linux',
llc_flags=['-mcpu=pentium4m'],
ld_emu='elf_i386_nacl',
+ sb_emu='elf_i386_nacl',
cross_headers=[])
X8664Target = TargetInfo(target='x8664',
@@ -32,6 +33,7 @@
triple='x86_64-none-linux-gnux32',
llc_flags=['-mcpu=x86-64'],
ld_emu='elf32_x86_64_nacl',
+ sb_emu='elf_x86_64_nacl',
cross_headers=[])
ARM32Target = TargetInfo(target='arm32',
@@ -41,6 +43,7 @@
'-float-abi=hard',
'-mattr=+neon'],
ld_emu='armelf_nacl',
+ sb_emu='armelf_nacl',
cross_headers=['-isystem', FindARMCrossInclude()])
def ConvertTripleToNaCl(nonsfi_triple):
diff --git a/runtime/szrt_asm_arm32.s b/runtime/szrt_asm_arm32.s
index 54cb380..1d01909 100644
--- a/runtime/szrt_asm_arm32.s
+++ b/runtime/szrt_asm_arm32.s
@@ -14,6 +14,3 @@
.text
.p2alignl 4,0xE7FEDEF0
- .globl __nacl_read_tp
-__nacl_read_tp:
- b __aeabi_read_tp
diff --git a/runtime/szrt_asm_x8632.s b/runtime/szrt_asm_x8632.s
index 518acef..d2387cd 100644
--- a/runtime/szrt_asm_x8632.s
+++ b/runtime/szrt_asm_x8632.s
@@ -15,6 +15,7 @@
.text
.p2align 5,0xf4
+ .ifdef NONSFI
.globl __Sz_getIP_eax
__Sz_getIP_eax:
movl (%esp), %eax
@@ -49,3 +50,4 @@
__Sz_getIP_edi:
movl (%esp), %edi
ret
+ .endif # NONSFI
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index 993dc44..11a4a1b 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -642,7 +642,7 @@
*RegNumBVIter(Filtered ? Iter.Free : Iter.FreeUnfiltered).begin();
Iter.Cur->setRegNumTmp(RegNum);
if (Filtered)
- dumpLiveRangeTrace("Allocating ", Iter.Cur);
+ dumpLiveRangeTrace("Allocating Y ", Iter.Cur);
else
dumpLiveRangeTrace("Allocating X ", Iter.Cur);
const llvm::SmallBitVector &Aliases = *RegAliases[RegNum];
@@ -768,7 +768,7 @@
++RegUses[RegAlias];
}
Active.push_back(Iter.Cur);
- dumpLiveRangeTrace("Allocating ", Iter.Cur);
+ dumpLiveRangeTrace("Allocating Z ", Iter.Cur);
}
void LinearScan::assignFinalRegisters(
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 85e0a65..b3e43eb 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -385,7 +385,8 @@
// This is not the variable we are looking for.
continue;
}
- assert(Var64->hasReg() || !Var64->mustHaveReg());
+ // only allow infinite-weight i64 temporaries to be register allocated.
+ assert(!Var64->hasReg() || Var64->mustHaveReg());
if (!Var64->hasReg()) {
continue;
}
@@ -4401,10 +4402,16 @@
}
TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
- Operand *Src0 = legalizeUndef(Instr->getSrc(0));
- Operand *Src1 = legalizeUndef(Instr->getSrc(1));
+ return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0),
+ Instr->getSrc(1));
+}
- const InstIcmp::ICond Condition = Instr->getCondition();
+TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition,
+ Operand *Src0,
+ Operand *Src1) {
+ Src0 = legalizeUndef(Src0);
+ Src1 = legalizeUndef(Src1);
+
// a=icmp cond b, c ==>
// GCC does:
// <u/s>xtb tb, b
@@ -4504,162 +4511,156 @@
}
} // end of anonymous namespace
+void TargetARM32::lowerLoadLinkedStoreExclusive(
+ Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation,
+ CondARM32::Cond Cond) {
+
+ auto *Retry = Context.insert<InstARM32Label>(this);
+ { // scoping for loop highlighting.
+ Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty);
+ auto *Success = makeReg(IceType_i32);
+ auto *_0 = Ctx->getConstantZero(IceType_i32);
+
+ Context.insert<InstFakeDef>(Tmp);
+ Context.insert<InstFakeUse>(Tmp);
+ Variable *AddrR = legalizeToReg(Addr);
+ _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined();
+ auto *StoreValue = Operation(Tmp);
+ assert(StoreValue->mustHaveReg());
+ _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond);
+ _cmp(Success, _0, Cond);
+ }
+ _br(Retry, CondARM32::NE);
+}
+
+namespace {
+InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest,
+ Variable *Src0, Operand *Src1) {
+ InstArithmetic::OpKind Oper;
+ switch (Operation) {
+ default:
+ llvm::report_fatal_error("Unknown AtomicRMW operation");
+ case Intrinsics::AtomicExchange:
+ llvm::report_fatal_error("Can't handle Atomic xchg operation");
+ case Intrinsics::AtomicAdd:
+ Oper = InstArithmetic::Add;
+ break;
+ case Intrinsics::AtomicAnd:
+ Oper = InstArithmetic::And;
+ break;
+ case Intrinsics::AtomicSub:
+ Oper = InstArithmetic::Sub;
+ break;
+ case Intrinsics::AtomicOr:
+ Oper = InstArithmetic::Or;
+ break;
+ case Intrinsics::AtomicXor:
+ Oper = InstArithmetic::Xor;
+ break;
+ }
+ return InstArithmetic::create(Func, Oper, Dest, Src0, Src1);
+}
+} // end of anonymous namespace
+
void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
- Operand *Ptr, Operand *Val) {
+ Operand *Addr, Operand *Val) {
// retry:
- // ldrex contents, [addr]
- // op tmp, contents, operand
- // strex success, tmp, [addr]
+ // ldrex tmp, [addr]
+ // mov contents, tmp
+ // op result, contents, Val
+ // strex success, result, [addr]
+ // cmp success, 0
// jne retry
// fake-use(addr, operand) @ prevents undesirable clobbering.
// mov dest, contents
- assert(Dest != nullptr);
- Type DestTy = Dest->getType();
- (void)Ptr;
- (void)Val;
-
- OperandARM32Mem *Mem;
- Variable *PtrContentsReg;
- Variable *PtrContentsHiReg;
- Variable *PtrContentsLoReg;
- Variable *Value = Func->makeVariable(DestTy);
- Variable *ValueReg;
- Variable *ValueHiReg;
- Variable *ValueLoReg;
- Variable *Success = makeReg(IceType_i32);
- Variable *TmpReg;
- Variable *TmpHiReg;
- Variable *TmpLoReg;
- Operand *_0 = Ctx->getConstantZero(IceType_i32);
- auto *Retry = InstARM32Label::create(Func, this);
+ auto DestTy = Dest->getType();
if (DestTy == IceType_i64) {
- Variable64On32 *PtrContentsReg64 = makeI64RegPair();
- PtrContentsHiReg = PtrContentsReg64->getHi();
- PtrContentsLoReg = PtrContentsReg64->getLo();
- PtrContentsReg = PtrContentsReg64;
-
- llvm::cast<Variable64On32>(Value)->initHiLo(Func);
- Variable64On32 *ValueReg64 = makeI64RegPair();
- ValueHiReg = ValueReg64->getHi();
- ValueLoReg = ValueReg64->getLo();
- ValueReg = ValueReg64;
-
- Variable64On32 *TmpReg64 = makeI64RegPair();
- TmpHiReg = TmpReg64->getHi();
- TmpLoReg = TmpReg64->getLo();
- TmpReg = TmpReg64;
- } else {
- PtrContentsReg = makeReg(DestTy);
- PtrContentsHiReg = nullptr;
- PtrContentsLoReg = PtrContentsReg;
-
- ValueReg = makeReg(DestTy);
- ValueHiReg = nullptr;
- ValueLoReg = ValueReg;
-
- TmpReg = makeReg(DestTy);
- TmpHiReg = nullptr;
- TmpLoReg = TmpReg;
- }
-
- if (DestTy == IceType_i64) {
- Context.insert<InstFakeDef>(Value);
- }
- lowerAssign(InstAssign::create(Func, Value, Val));
-
- Variable *PtrVar = Func->makeVariable(IceType_i32);
- lowerAssign(InstAssign::create(Func, PtrVar, Ptr));
-
- _dmb();
- Context.insert(Retry);
- Mem = formMemoryOperand(PtrVar, DestTy);
- if (DestTy == IceType_i64) {
- Context.insert<InstFakeDef>(ValueReg, Value);
- }
- lowerAssign(InstAssign::create(Func, ValueReg, Value));
- if (DestTy == IceType_i8 || DestTy == IceType_i16) {
- _uxt(ValueReg, ValueReg);
- }
- _ldrex(PtrContentsReg, Mem);
-
- if (DestTy == IceType_i64) {
- Context.insert<InstFakeDef>(TmpReg, ValueReg);
- }
- switch (Operation) {
- default:
- Func->setError("Unknown AtomicRMW operation");
+ lowerInt64AtomicRMW(Dest, Operation, Addr, Val);
return;
- case Intrinsics::AtomicAdd:
- if (DestTy == IceType_i64) {
- _adds(TmpLoReg, PtrContentsLoReg, ValueLoReg);
- _adc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
- } else {
- _add(TmpLoReg, PtrContentsLoReg, ValueLoReg);
- }
- break;
- case Intrinsics::AtomicSub:
- if (DestTy == IceType_i64) {
- _subs(TmpLoReg, PtrContentsLoReg, ValueLoReg);
- _sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
- } else {
- _sub(TmpLoReg, PtrContentsLoReg, ValueLoReg);
- }
- break;
- case Intrinsics::AtomicOr:
- _orr(TmpLoReg, PtrContentsLoReg, ValueLoReg);
- if (DestTy == IceType_i64) {
- _orr(TmpHiReg, PtrContentsHiReg, ValueHiReg);
- }
- break;
- case Intrinsics::AtomicAnd:
- _and(TmpLoReg, PtrContentsLoReg, ValueLoReg);
- if (DestTy == IceType_i64) {
- _and(TmpHiReg, PtrContentsHiReg, ValueHiReg);
- }
- break;
- case Intrinsics::AtomicXor:
- _eor(TmpLoReg, PtrContentsLoReg, ValueLoReg);
- if (DestTy == IceType_i64) {
- _eor(TmpHiReg, PtrContentsHiReg, ValueHiReg);
- }
- break;
- case Intrinsics::AtomicExchange:
- _mov(TmpLoReg, ValueLoReg);
- if (DestTy == IceType_i64) {
- _mov(TmpHiReg, ValueHiReg);
- }
- break;
}
- _strex(Success, TmpReg, Mem);
- _cmp(Success, _0);
- _br(Retry, CondARM32::NE);
- // The following fake-uses ensure that Subzero will not clobber them in the
- // load-linked/store-conditional loop above. We might have to spill them, but
- // spilling is preferable over incorrect behavior.
- Context.insert<InstFakeUse>(PtrVar);
- if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
- Context.insert<InstFakeUse>(Value64->getHi());
- Context.insert<InstFakeUse>(Value64->getLo());
+ Operand *ValRF = nullptr;
+ if (llvm::isa<ConstantInteger32>(Val)) {
+ ValRF = Val;
} else {
- Context.insert<InstFakeUse>(Value);
+ ValRF = legalizeToReg(Val);
}
+ auto *ContentsR = makeReg(DestTy);
+ auto *ResultR = makeReg(DestTy);
+
_dmb();
- if (DestTy == IceType_i8 || DestTy == IceType_i16) {
- _uxt(PtrContentsReg, PtrContentsReg);
+ lowerLoadLinkedStoreExclusive(
+ DestTy, Addr,
+ [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
+ lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
+ if (Operation == Intrinsics::AtomicExchange) {
+ lowerAssign(InstAssign::create(Func, ResultR, ValRF));
+ } else {
+ lowerArithmetic(
+ createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
+ }
+ return ResultR;
+ });
+ _dmb();
+ if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) {
+ Context.insert<InstFakeUse>(ValR);
+ }
+ // Can't dce ContentsR.
+ Context.insert<InstFakeUse>(ContentsR);
+ lowerAssign(InstAssign::create(Func, Dest, ContentsR));
+}
+
+void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation,
+ Operand *Addr, Operand *Val) {
+ assert(Dest->getType() == IceType_i64);
+
+ auto *ResultR = makeI64RegPair();
+
+ Context.insert<InstFakeDef>(ResultR);
+
+ Operand *ValRF = nullptr;
+ if (llvm::dyn_cast<ConstantInteger64>(Val)) {
+ ValRF = Val;
+ } else {
+ auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
+ ValR64->initHiLo(Func);
+ ValR64->setMustNotHaveReg();
+ ValR64->getLo()->setMustHaveReg();
+ ValR64->getHi()->setMustHaveReg();
+ lowerAssign(InstAssign::create(Func, ValR64, Val));
+ ValRF = ValR64;
}
- if (DestTy == IceType_i64) {
- Context.insert<InstFakeUse>(PtrContentsReg);
+ auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
+ ContentsR->initHiLo(Func);
+ ContentsR->setMustNotHaveReg();
+ ContentsR->getLo()->setMustHaveReg();
+ ContentsR->getHi()->setMustHaveReg();
+
+ _dmb();
+ lowerLoadLinkedStoreExclusive(
+ IceType_i64, Addr,
+ [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
+ lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
+ Context.insert<InstFakeUse>(Tmp);
+ if (Operation == Intrinsics::AtomicExchange) {
+ lowerAssign(InstAssign::create(Func, ResultR, ValRF));
+ } else {
+ lowerArithmetic(
+ createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
+ }
+ Context.insert<InstFakeUse>(ResultR->getHi());
+ Context.insert<InstFakeDef>(ResultR, ResultR->getLo())
+ ->setDestRedefined();
+ return ResultR;
+ });
+ _dmb();
+ if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) {
+ Context.insert<InstFakeUse>(ValR64->getLo());
+ Context.insert<InstFakeUse>(ValR64->getHi());
}
- lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg));
- if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
- Context.insert<InstFakeUse>(Dest64->getLo());
- Context.insert<InstFakeUse>(Dest64->getHi());
- } else {
- Context.insert<InstFakeUse>(Dest);
- }
+ lowerAssign(InstAssign::create(Func, Dest, ContentsR));
}
void TargetARM32::postambleCtpop64(const InstCall *Instr) {
@@ -4733,10 +4734,9 @@
}
_dmb();
lowerAssign(InstAssign::create(Func, Dest, T));
- // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
- // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
- // the FakeUse on the last-inserted instruction's dest.
- Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
+ // Adding a fake-use T to ensure the atomic load is not removed if Dest is
+ // unused.
+ Context.insert<InstFakeUse>(T);
return;
}
case Intrinsics::AtomicStore: {
@@ -4747,105 +4747,48 @@
Func->setError("Unexpected memory ordering for AtomicStore");
return;
}
- Operand *Value = Instr->getArg(0);
- Type ValueTy = Value->getType();
- assert(isScalarIntegerType(ValueTy));
- Operand *Addr = Instr->getArg(1);
- if (ValueTy == IceType_i64) {
- // Atomic 64-bit stores require a load-locked/store-conditional loop using
- // ldrexd, and strexd. The lowered code is:
- //
- // retry:
- // ldrexd t.lo, t.hi, [addr]
- // strexd success, value.lo, value.hi, [addr]
- // cmp success, #0
- // bne retry
- // fake-use(addr, value.lo, value.hi)
- //
- // The fake-use is needed to prevent those variables from being clobbered
- // in the loop (which will happen under register pressure.)
- Variable64On32 *Tmp = makeI64RegPair();
- Variable64On32 *ValueVar =
- llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
- Variable *AddrVar = makeReg(IceType_i32);
- Variable *Success = makeReg(IceType_i32);
- OperandARM32Mem *Mem;
- Operand *_0 = Ctx->getConstantZero(IceType_i32);
- auto *Retry = InstARM32Label::create(Func, this);
- Variable64On32 *NewReg = makeI64RegPair();
- ValueVar->initHiLo(Func);
- ValueVar->mustNotHaveReg();
-
+ auto *Value = Instr->getArg(0);
+ if (Value->getType() == IceType_i64) {
+ auto *ValueR = makeI64RegPair();
+ Context.insert<InstFakeDef>(ValueR);
+ lowerAssign(InstAssign::create(Func, ValueR, Value));
_dmb();
- lowerAssign(InstAssign::create(Func, ValueVar, Value));
- lowerAssign(InstAssign::create(Func, AddrVar, Addr));
-
- Context.insert(Retry);
- Context.insert<InstFakeDef>(NewReg);
- lowerAssign(InstAssign::create(Func, NewReg, ValueVar));
- Mem = formMemoryOperand(AddrVar, IceType_i64);
- _ldrex(Tmp, Mem);
- // This fake-use both prevents the ldrex from being dead-code eliminated,
- // while also keeping liveness happy about all defs being used.
- Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
- _strex(Success, NewReg, Mem);
- _cmp(Success, _0);
- _br(Retry, CondARM32::NE);
-
- Context.insert<InstFakeUse>(ValueVar->getLo());
- Context.insert<InstFakeUse>(ValueVar->getHi());
- Context.insert<InstFakeUse>(AddrVar);
+ lowerLoadLinkedStoreExclusive(
+ IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) {
+ // The following fake-use prevents the ldrex instruction from being
+ // dead code eliminated.
+ Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp)));
+ Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp)));
+ Context.insert<InstFakeUse>(Tmp);
+ return ValueR;
+ });
+ Context.insert<InstFakeUse>(ValueR);
_dmb();
return;
}
+
+ auto *ValueR = legalizeToReg(Instr->getArg(0));
+ const auto ValueTy = ValueR->getType();
+ assert(isScalarIntegerType(ValueTy));
+ auto *Addr = legalizeToReg(Instr->getArg(1));
+
// non-64-bit stores are atomically as long as the address is aligned. This
// is PNaCl, so addresses are aligned.
- Variable *T = makeReg(ValueTy);
-
_dmb();
- lowerAssign(InstAssign::create(Func, T, Value));
- _str(T, formMemoryOperand(Addr, ValueTy));
+ _str(ValueR, formMemoryOperand(Addr, ValueTy));
_dmb();
return;
}
case Intrinsics::AtomicCmpxchg: {
- // The initial lowering for cmpxchg was:
- //
// retry:
// ldrex tmp, [addr]
// cmp tmp, expected
// mov expected, tmp
- // jne retry
- // strex success, new, [addr]
- // cmp success, #0
- // bne retry
- // mov dest, expected
- //
- // Besides requiring two branches, that lowering could also potentially
- // write to memory (in mov expected, tmp) unless we were OK with increasing
- // the register pressure and requiring expected to be an infinite-weight
- // variable (spoiler alert: that was a problem for i64 cmpxchg.) Through
- // careful rewritting, and thanks to predication, we now implement the
- // lowering as:
- //
- // retry:
- // ldrex tmp, [addr]
- // cmp tmp, expected
// strexeq success, new, [addr]
- // movne expected, tmp
// cmpeq success, #0
// bne retry
// mov dest, expected
- //
- // Predication lets us move the strex ahead of the mov expected, tmp, which
- // allows tmp to be a non-infinite weight temporary. We wanted to avoid
- // writing to memory between ldrex and strex because, even though most times
- // that would cause no issues, if any interleaving memory write aliased
- // [addr] than we would have undefined behavior. Undefined behavior isn't
- // cool, so we try to avoid it. See the "Synchronization and semaphores"
- // section of the "ARM Architecture Reference Manual."
-
assert(isScalarIntegerType(DestTy));
// We require the memory address to be naturally aligned. Given that is the
// case, then normal loads are atomic.
@@ -4856,98 +4799,63 @@
return;
}
- OperandARM32Mem *Mem;
- Variable *TmpReg;
- Variable *Expected, *ExpectedReg;
- Variable *New, *NewReg;
- Variable *Success = makeReg(IceType_i32);
- Operand *_0 = Ctx->getConstantZero(IceType_i32);
- auto *Retry = InstARM32Label::create(Func, this);
-
if (DestTy == IceType_i64) {
- Variable64On32 *TmpReg64 = makeI64RegPair();
- Variable64On32 *New64 =
- llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
- Variable64On32 *NewReg64 = makeI64RegPair();
- Variable64On32 *Expected64 =
- llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
- Variable64On32 *ExpectedReg64 = makeI64RegPair();
-
- New64->initHiLo(Func);
- New64->mustNotHaveReg();
- Expected64->initHiLo(Func);
- Expected64->mustNotHaveReg();
-
- TmpReg = TmpReg64;
- New = New64;
- NewReg = NewReg64;
- Expected = Expected64;
- ExpectedReg = ExpectedReg64;
- } else {
- TmpReg = makeReg(DestTy);
- New = Func->makeVariable(DestTy);
- NewReg = makeReg(DestTy);
- Expected = Func->makeVariable(DestTy);
- ExpectedReg = makeReg(DestTy);
- }
-
- Mem = formMemoryOperand(Instr->getArg(0), DestTy);
- if (DestTy == IceType_i64) {
- Context.insert<InstFakeDef>(Expected);
- }
- lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
- if (DestTy == IceType_i64) {
+ auto *New = makeI64RegPair();
Context.insert<InstFakeDef>(New);
+ lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
+
+ auto *Expected = makeI64RegPair();
+ Context.insert<InstFakeDef>(Expected);
+ lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
+
+ _dmb();
+ lowerLoadLinkedStoreExclusive(
+ DestTy, Instr->getArg(0),
+ [this, Expected, New, Instr, DestTy](Variable *Tmp) {
+ auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected));
+ auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected));
+ auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp));
+ auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp));
+ _cmp(TmpLoR, ExpectedLoR);
+ _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ);
+ // Adding an explicit use of Tmp here, or its live range will not
+ // reach here (only those of Tmp.Lo and Tmp.Hi will.)
+ Context.insert<InstFakeUse>(Tmp);
+ _mov_redefined(ExpectedLoR, TmpLoR);
+ _mov_redefined(ExpectedHiR, TmpHiR);
+ // Same as above.
+ Context.insert<InstFakeUse>(Tmp);
+ return New;
+ },
+ CondARM32::EQ);
+ _dmb();
+
+ lowerAssign(InstAssign::create(Func, Dest, Expected));
+ // The fake-use Expected prevents the assignments to Expected (above)
+ // from being removed if Dest is not used.
+ Context.insert<InstFakeUse>(Expected);
+ // New needs to be alive here, or its live range will end in the
+ // strex instruction.
+ Context.insert<InstFakeUse>(New);
+ return;
}
- lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
+
+ auto *New = legalizeToReg(Instr->getArg(2));
+ auto *Expected = legalizeToReg(Instr->getArg(1));
+
+ _dmb();
+ lowerLoadLinkedStoreExclusive(
+ DestTy,
+ Instr->getArg(0), [this, Expected, New, Instr, DestTy](Variable *Tmp) {
+ lowerIcmpCond(InstIcmp::Eq, Tmp, Expected);
+ _mov_redefined(Expected, Tmp);
+ return New;
+ }, CondARM32::EQ);
_dmb();
- Context.insert(Retry);
- if (DestTy == IceType_i64) {
- Context.insert<InstFakeDef>(ExpectedReg, Expected);
- }
- lowerAssign(InstAssign::create(Func, ExpectedReg, Expected));
- if (DestTy == IceType_i64) {
- Context.insert<InstFakeDef>(NewReg, New);
- }
- lowerAssign(InstAssign::create(Func, NewReg, New));
-
- _ldrex(TmpReg, Mem);
- Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
- if (DestTy == IceType_i64) {
- auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
- auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg);
- // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's
- // keep liveness happy, shall we?
- Context.insert<InstFakeUse>(TmpReg);
- Context.insert<InstFakeUse>(ExpectedReg);
- _cmp(TmpReg64->getHi(), ExpectedReg64->getHi());
- _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ);
- } else {
- _cmp(TmpReg, ExpectedReg);
- }
- _strex(Success, NewReg, Mem, CondARM32::EQ);
- if (DestTy == IceType_i64) {
- auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
- auto *Expected64 = llvm::cast<Variable64On32>(Expected);
- _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE);
- _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE);
- Context.insert<InstFakeDef>(Expected, TmpReg);
- _set_dest_redefined();
- } else {
- _mov_redefined(Expected, TmpReg, CondARM32::NE);
- }
- _cmp(Success, _0, CondARM32::EQ);
- _br(Retry, CondARM32::NE);
- _dmb();
lowerAssign(InstAssign::create(Func, Dest, Expected));
Context.insert<InstFakeUse>(Expected);
- if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) {
- Context.insert<InstFakeUse>(New64->getLo());
- Context.insert<InstFakeUse>(New64->getHi());
- } else {
- Context.insert<InstFakeUse>(New);
- }
+ Context.insert<InstFakeUse>(New);
return;
}
case Intrinsics::AtomicRMW: {
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index e457127..83e3c58 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -246,8 +246,29 @@
Operand *Src1);
CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1);
+ CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0,
+ Operand *Src1);
CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
void lowerIcmp(const InstIcmp *Instr) override;
+ /// Emits the basic sequence for lower-linked/store-exclusive loops:
+ ///
+ /// retry:
+ /// ldrex tmp, [Addr]
+ /// StoreValue = Operation(tmp)
+ /// strexCond success, StoreValue, [Addr]
+ /// cmpCond success, #0
+ /// bne retry
+ ///
+ /// Operation needs to return which value to strex in Addr, it must not change
+ /// the flags if Cond is not AL, and must not emit any instructions that could
+ /// end up writing to memory. Operation also needs to handle fake-defing for
+ /// i64 handling.
+ void
+ lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr,
+ std::function<Variable *(Variable *)> Operation,
+ CondARM32::Cond Cond = CondARM32::AL);
+ void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
+ Operand *Val);
void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
Operand *Val);
void lowerIntrinsicCall(const InstIntrinsicCall *Instr) override;
@@ -360,13 +381,14 @@
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Ldr>(Dest, Addr, Pred);
}
- void _ldrex(Variable *Dest, OperandARM32Mem *Addr,
- CondARM32::Cond Pred = CondARM32::AL) {
- Context.insert<InstARM32Ldrex>(Dest, Addr, Pred);
+ InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred);
if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
Context.insert<InstFakeDef>(Dest64->getLo(), Dest);
Context.insert<InstFakeDef>(Dest64->getHi(), Dest);
}
+ return Ldrex;
}
void _lsl(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
diff --git a/tests_lit/assembler/arm32/ldrex-strex.ll b/tests_lit/assembler/arm32/ldrex-strex.ll
index b999801..47e99f3 100644
--- a/tests_lit/assembler/arm32/ldrex-strex.ll
+++ b/tests_lit/assembler/arm32/ldrex-strex.ll
@@ -28,140 +28,38 @@
define internal i32 @testI8Form(i32 %ptr, i32 %a) {
; ASM-LABEL:testI8Form:
-; DIS-LABEL:00000000 <testI8Form>:
+; DIS-LABEL:<testI8Form>:
; IASM-LABEL:testI8Form:
entry:
-; ASM-NEXT:.LtestI8Form$entry:
-; IASM-NEXT:.LtestI8Form$entry:
-
-; ASM-NEXT: sub sp, sp, #28
-; DIS-NEXT: 0: e24dd01c
-; IASM-NEXT: .byte 0x1c
-; IASM-NEXT: .byte 0xd0
-; IASM-NEXT: .byte 0x4d
-; IASM-NEXT: .byte 0xe2
-
-; ASM-NEXT: str r0, [sp, #24]
-; ASM-NEXT: # [sp, #24] = def.pseudo
-; DIS-NEXT: 4: e58d0018
-; IASM-NEXT: .byte 0x18
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: str r1, [sp, #20]
-; ASM-NEXT: # [sp, #20] = def.pseudo
-; DIS-NEXT: 8: e58d1014
-; IASM-NEXT: .byte 0x14
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
%ptr.asptr = inttoptr i32 %ptr to i8*
%a.arg_trunc = trunc i32 %a to i8
-; ASM-NEXT: ldr r0, [sp, #20]
-; DIS-NEXT: c: e59d0014
-; IASM-NEXT: .byte 0x14
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: strb r0, [sp, #16]
-; DIS-NEXT: 10: e5cd0010
-; ASM-NEXT: # [sp, #16] = def.pseudo
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0xcd
-; IASM-NEXT: .byte 0xe5
-
%v = call i8 @llvm.nacl.atomic.rmw.i8(i32 1, i8* %ptr.asptr,
i8 %a.arg_trunc, i32 6)
-; ASM-NEXT: ldrb r0, [sp, #16]
-; DIS-NEXT: 14: e5dd0010
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0xdd
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: strb r0, [sp, #4]
-; ASM-NEXT: # [sp, #4] = def.pseudo
-; DIS-NEXT: 18: e5cd0004
-; IASM-NEXT: .byte 0x4
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0xcd
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: ldr r0, [sp, #24]
-; DIS-NEXT: 1c: e59d0018
-; IASM-NEXT: .byte 0x18
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: str r0, [sp]
-; ASM-NEXT: # [sp] = def.pseudo
-; DIS-NEXT: 20: e58d0000
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: dmb sy
-; DIS-NEXT: 24: f57ff05f
-; IASM-NEXT: .byte 0x5f
+; ****** Example of dmb *******
+; ASM: dmb sy
+; DIS: 1c: f57ff05f
+; IASM: .byte 0x5f
; IASM-NEXT: .byte 0xf0
; IASM-NEXT: .byte 0x7f
; IASM-NEXT: .byte 0xf5
-; ASM-NEXT:.LtestI8Form$local$__0:
-; IASM-NEXT:.LtestI8Form$local$__0:
-
-; ASM-NEXT: ldr r0, [sp]
-; DIS-NEXT: 28: e59d0000
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: ldrb r1, [sp, #4]
-; DIS-NEXT: 2c: e5dd1004
-; IASM-NEXT: .byte 0x4
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0xdd
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: uxtb r1, r1
-; DIS-NEXT: 30: e6ef1071
-; IASM-NEXT: .byte 0x71
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0xef
-; IASM-NEXT: .byte 0xe6
-
; ***** Example of ldrexb *****
-; ASM-NEXT: ldrexb r2, [r0]
-; DIS-NEXT: 34: e1d02f9f
-; IASM-NEXT: .byte 0x9f
-; IASM-NEXT: .byte 0x2f
-; IASM-NEXT: .byte 0xd0
+; ASM: ldrexb r1, [r2]
+; DIS: 24: e1d21f9f
+; IASM: .byte 0x9f
+; IASM-NEXT: .byte 0x1f
+; IASM-NEXT: .byte 0xd2
; IASM-NEXT: .byte 0xe1
-; ASM-NEXT: add r1, r2, r1
-; ASM-NEXT: # r3 = def.pseudo
-; DIS-NEXT: 38: e0821001
-; IASM-NEXT: .byte 0x1
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0x82
-; IASM-NEXT: .byte 0xe0
-
; ***** Example of strexb *****
-; ASM-NEXT: strexb r3, r1, [r0]
-; DIS-NEXT: 3c: e1c03f91
-; IASM-NEXT: .byte 0x91
-; IASM-NEXT: .byte 0x3f
-; IASM-NEXT: .byte 0xc0
+; ASM: strexb r4, r3, [r2]
+; DIS: 2c: e1c24f93
+; IASM: .byte 0x93
+; IASM-NEXT: .byte 0x4f
+; IASM-NEXT: .byte 0xc2
; IASM-NEXT: .byte 0xe1
%retval = zext i8 %v to i32
@@ -170,140 +68,29 @@
define internal i32 @testI16Form(i32 %ptr, i32 %a) {
; ASM-LABEL:testI16Form:
-; DIS-LABEL:00000070 <testI16Form>:
+; DIS-LABEL:<testI16Form>:
; IASM-LABEL:testI16Form:
entry:
-; ASM-NEXT:.LtestI16Form$entry:
-; IASM-NEXT:.LtestI16Form$entry:
-
-; ASM-NEXT: sub sp, sp, #28
-; DIS-NEXT: 70: e24dd01c
-; IASM-NEXT: .byte 0x1c
-; IASM-NEXT: .byte 0xd0
-; IASM-NEXT: .byte 0x4d
-; IASM-NEXT: .byte 0xe2
-
-; ASM-NEXT: str r0, [sp, #24]
-; ASM-NEXT: # [sp, #24] = def.pseudo
-; DIS-NEXT: 74: e58d0018
-; IASM-NEXT: .byte 0x18
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: str r1, [sp, #20]
-; ASM-NEXT: # [sp, #20] = def.pseudo
-; DIS-NEXT: 78: e58d1014
-; IASM-NEXT: .byte 0x14
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
%ptr.asptr = inttoptr i32 %ptr to i16*
%a.arg_trunc = trunc i32 %a to i16
-; ASM-NEXT: ldr r0, [sp, #20]
-; DIS-NEXT: 7c: e59d0014
-; IASM-NEXT: .byte 0x14
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: strh r0, [sp, #16]
-; ASM-NEXT: # [sp, #16] = def.pseudo
-; DIS-NEXT: 80: e1cd01b0
-; IASM-NEXT: .byte 0xb0
-; IASM-NEXT: .byte 0x1
-; IASM-NEXT: .byte 0xcd
-; IASM-NEXT: .byte 0xe1
-
%v = call i16 @llvm.nacl.atomic.rmw.i16(i32 1, i16* %ptr.asptr,
i16 %a.arg_trunc, i32 6)
-
-; ASM-NEXT: ldrh r0, [sp, #16]
-; DIS-NEXT: 84: e1dd01b0
-; IASM-NEXT: .byte 0xb0
-; IASM-NEXT: .byte 0x1
-; IASM-NEXT: .byte 0xdd
-; IASM-NEXT: .byte 0xe1
-
-; ASM-NEXT: strh r0, [sp, #4]
-; ASM-NEXT: # [sp, #4] = def.pseudo
-; DIS-NEXT: 88: e1cd00b4
-; IASM-NEXT: .byte 0xb4
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0xcd
-; IASM-NEXT: .byte 0xe1
-
-; ASM-NEXT: ldr r0, [sp, #24]
-; DIS-NEXT: 8c: e59d0018
-; IASM-NEXT: .byte 0x18
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: str r0, [sp]
-; ASM-NEXT: # [sp] = def.pseudo
-; DIS-NEXT: 90: e58d0000
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: dmb sy
-; DIS-NEXT: 94: f57ff05f
-; IASM-NEXT: .byte 0x5f
-; IASM-NEXT: .byte 0xf0
-; IASM-NEXT: .byte 0x7f
-; IASM-NEXT: .byte 0xf5
-
-; ASM-NEXT:.LtestI16Form$local$__0:
-; IASM-NEXT:.LtestI16Form$local$__0:
-
-; ASM-NEXT: ldr r0, [sp]
-; DIS-NEXT: 98: e59d0000
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: ldrh r1, [sp, #4]
-; DIS-NEXT: 9c: e1dd10b4
-; IASM-NEXT: .byte 0xb4
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0xdd
-; IASM-NEXT: .byte 0xe1
-
-; ASM-NEXT: uxth r1, r1
-; DIS-NEXT: a0: e6ff1071
-; IASM-NEXT: .byte 0x71
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0xff
-; IASM-NEXT: .byte 0xe6
-
; ***** Example of ldrexh *****
-; ASM-NEXT: ldrexh r2, [r0]
-; DIS-NEXT: a4: e1f02f9f
-; IASM-NEXT: .byte 0x9f
-; IASM-NEXT: .byte 0x2f
-; IASM-NEXT: .byte 0xf0
+; ASM: ldrexh r1, [r2]
+; DIS: 84: e1f21f9f
+; IASM: .byte 0x9f
+; IASM-NEXT: .byte 0x1f
+; IASM-NEXT: .byte 0xf2
; IASM-NEXT: .byte 0xe1
-; ASM-NEXT: add r1, r2, r1
-; ASM-NEXT: # r3 = def.pseudo
-; DIS-NEXT: a8: e0821001
-; IASM-NEXT: .byte 0x1
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0x82
-; IASM-NEXT: .byte 0xe0
-
; ***** Example of strexh *****
-; ASM-NEXT: strexh r3, r1, [r0]
-; DIS-NEXT: ac: e1e03f91
-; IASM-NEXT: .byte 0x91
-; IASM-NEXT: .byte 0x3f
-; IASM-NEXT: .byte 0xe0
+; ASM: strexh r4, r3, [r2]
+; DIS: 8c: e1e24f93
+; IASM: .byte 0x93
+; IASM-NEXT: .byte 0x4f
+; IASM-NEXT: .byte 0xe2
; IASM-NEXT: .byte 0xe1
%retval = zext i16 %v to i32
@@ -312,116 +99,28 @@
define internal i32 @testI32Form(i32 %ptr, i32 %a) {
; ASM-LABEL:testI32Form:
-; DIS-LABEL:000000e0 <testI32Form>:
+; DIS-LABEL:<testI32Form>:
; IASM-LABEL:testI32Form:
entry:
-; ASM-NEXT:.LtestI32Form$entry:
-; IASM-NEXT:.LtestI32Form$entry:
-
-; ASM-NEXT: sub sp, sp, #20
-; DIS-NEXT: e0: e24dd014
-; IASM-NEXT: .byte 0x14
-; IASM-NEXT: .byte 0xd0
-; IASM-NEXT: .byte 0x4d
-; IASM-NEXT: .byte 0xe2
-
-; ASM-NEXT: str r0, [sp, #16]
-; ASM-NEXT: # [sp, #16] = def.pseudo
-; DIS-NEXT: e4: e58d0010
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: str r1, [sp, #12]
-; ASM-NEXT: # [sp, #12] = def.pseudo
-; DIS-NEXT: e8: e58d100c
-; IASM-NEXT: .byte 0xc
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
%ptr.asptr = inttoptr i32 %ptr to i32*
%v = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr.asptr,
i32 %a, i32 6)
-; ASM-NEXT: ldr r0, [sp, #12]
-; DIS-NEXT: ec: e59d000c
-; IASM-NEXT: .byte 0xc
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: str r0, [sp, #4]
-; ASM-NEXT: # [sp, #4] = def.pseudo
-; DIS-NEXT: f0: e58d0004
-; IASM-NEXT: .byte 0x4
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: ldr r0, [sp, #16]
-; DIS-NEXT: f4: e59d0010
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: str r0, [sp]
-; ASM-NEXT: # [sp] = def.pseudo
-; DIS-NEXT: f8: e58d0000
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: dmb sy
-; DIS-NEXT: fc: f57ff05f
-; IASM-NEXT: .byte 0x5f
-; IASM-NEXT: .byte 0xf0
-; IASM-NEXT: .byte 0x7f
-; IASM-NEXT: .byte 0xf5
-
-; ASM-NEXT:.LtestI32Form$local$__0:
-; IASM-NEXT:.LtestI32Form$local$__0:
-
-; ASM-NEXT: ldr r0, [sp]
-; DIS-NEXT: 100: e59d0000
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: ldr r1, [sp, #4]
-; DIS-NEXT: 104: e59d1004
-; IASM-NEXT: .byte 0x4
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
; ***** Example of ldrex *****
-; ASM-NEXT: ldrex r2, [r0]
-; DIS-NEXT: 108: e1902f9f
-; IASM-NEXT: .byte 0x9f
-; IASM-NEXT: .byte 0x2f
-; IASM-NEXT: .byte 0x90
+; ASM: ldrex r1, [r2]
+; DIS: dc: e1921f9f
+; IASM: .byte 0x9f
+; IASM-NEXT: .byte 0x1f
+; IASM-NEXT: .byte 0x92
; IASM-NEXT: .byte 0xe1
-; ASM-NEXT: add r1, r2, r1
-; ASM-NEXT: # r3 = def.pseudo
-; DIS-NEXT: 10c: e0821001
-; IASM-NEXT: .byte 0x1
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0x82
-; IASM-NEXT: .byte 0xe0
-
; ***** Example of strex *****
-; ASM-NEXT: strex r3, r1, [r0]
-; DIS-NEXT: 110: e1803f91
-; IASM-NEXT: .byte 0x91
-; IASM-NEXT: .byte 0x3f
-; IASM-NEXT: .byte 0x80
+; ASM: strex r4, r3, [r2]
+; DIS: e4: e1824f93
+; IASM: .byte 0x93
+; IASM-NEXT: .byte 0x4f
+; IASM-NEXT: .byte 0x82
; IASM-NEXT: .byte 0xe1
ret i32 %v
@@ -429,193 +128,28 @@
define internal i64 @testI64Form(i32 %ptr, i64 %a) {
; ASM-LABEL:testI64Form:
-; DIS-LABEL:00000130 <testI64Form>:
+; DIS-LABEL:<testI64Form>:
; IASM-LABEL:testI64Form:
entry:
-; ASM-NEXT:.LtestI64Form$entry:
-; IASM-NEXT:.LtestI64Form$entry:
-
-; ASM-NEXT: push {r4, r5}
-; DIS-NEXT: 130: e92d0030
-; IASM-NEXT: .byte 0x30
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x2d
-; IASM-NEXT: .byte 0xe9
-
-; ASM-NEXT: sub sp, sp, #32
-; DIS-NEXT: 134: e24dd020
-; IASM-NEXT: .byte 0x20
-; IASM-NEXT: .byte 0xd0
-; IASM-NEXT: .byte 0x4d
-; IASM-NEXT: .byte 0xe2
-
-; ASM-NEXT: str r0, [sp, #28]
-; ASM-NEXT: # [sp, #28] = def.pseudo
-; DIS-NEXT: 138: e58d001c
-; IASM-NEXT: .byte 0x1c
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: mov r0, r2
-; DIS-NEXT: 13c: e1a00002
-; IASM-NEXT: .byte 0x2
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0xa0
-; IASM-NEXT: .byte 0xe1
-
-; ASM-NEXT: str r0, [sp, #24]
-; ASM-NEXT: # [sp, #24] = def.pseudo
-; DIS-NEXT: 140: e58d0018
-; IASM-NEXT: .byte 0x18
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: mov r0, r3
-; DIS-NEXT: 144: e1a00003
-; IASM-NEXT: .byte 0x3
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0xa0
-; IASM-NEXT: .byte 0xe1
-
-; ASM-NEXT: str r0, [sp, #20]
-; ASM-NEXT: # [sp, #20] = def.pseudo
-; ASM-NEXT: # [sp] = def.pseudo
-; DIS-NEXT: 148: e58d0014
-; IASM-NEXT: .byte 0x14
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
%ptr.asptr = inttoptr i32 %ptr to i64*
%v = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr.asptr,
i64 %a, i32 6)
-; ASM-NEXT: ldr r0, [sp, #24]
-; DIS-NEXT: 14c: e59d0018
-; IASM-NEXT: .byte 0x18
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: str r0, [sp, #8]
-; ASM-NEXT: # [sp, #8] = def.pseudo
-; DIS-NEXT: 150: e58d0008
-; IASM-NEXT: .byte 0x8
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: ldr r0, [sp, #20]
-; DIS-NEXT: 154: e59d0014
-; IASM-NEXT: .byte 0x14
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: str r0, [sp, #4]
-; ASM-NEXT: # [sp, #4] = def.pseudo
-; DIS-NEXT: 158: e58d0004
-; IASM-NEXT: .byte 0x4
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: ldr r0, [sp, #28]
-; DIS-NEXT: 15c: e59d001c
-; IASM-NEXT: .byte 0x1c
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: str r0, [sp]
-; ASM-NEXT: # [sp] = def.pseudo
-; DIS-NEXT: 160: e58d0000
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: dmb sy
-; DIS-NEXT: 164: f57ff05f
-; IASM-NEXT: .byte 0x5f
-; IASM-NEXT: .byte 0xf0
-; IASM-NEXT: .byte 0x7f
-; IASM-NEXT: .byte 0xf5
-
-; ASM-NEXT:.LtestI64Form$local$__0:
-; IASM-NEXT:.LtestI64Form$local$__0:
-
-; ASM-NEXT: ldr r0, [sp]
-; ASM-NEXT: # r2, r3 = def.pseudo [sp]
-; DIS-NEXT: 168: e59d0000
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: ldr r1, [sp, #8]
-; DIS-NEXT: 16c: e59d1008
-; IASM-NEXT: .byte 0x8
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: mov r2, r1
-; DIS-NEXT: 170: e1a02001
-; IASM-NEXT: .byte 0x1
-; IASM-NEXT: .byte 0x20
-; IASM-NEXT: .byte 0xa0
-; IASM-NEXT: .byte 0xe1
-
-; ASM-NEXT: ldr r1, [sp, #4]
-; DIS-NEXT: 174: e59d1004
-; IASM-NEXT: .byte 0x4
-; IASM-NEXT: .byte 0x10
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe5
-
-; ASM-NEXT: mov r3, r1
-; DIS-NEXT: 178: e1a03001
-; IASM-NEXT: .byte 0x1
-; IASM-NEXT: .byte 0x30
-; IASM-NEXT: .byte 0xa0
-; IASM-NEXT: .byte 0xe1
-
; ***** Example of ldrexd *****
-; ASM-NEXT: ldrexd r4, r5, [r0]
-; ASM-NEXT: # r4 = def.pseudo r4, r5
-; ASM-NEXT: # r5 = def.pseudo r4, r5
-; ASM-NEXT: # r2, r3 = def.pseudo r2, r3
-; DIS-NEXT: 17c: e1b04f9f
-; IASM-NEXT: .byte 0x9f
+; ASM: ldrexd r4, r5, [r6]
+; DIS: 13c: e1b64f9f
+; IASM: .byte 0x9f
; IASM-NEXT: .byte 0x4f
-; IASM-NEXT: .byte 0xb0
+; IASM-NEXT: .byte 0xb6
; IASM-NEXT: .byte 0xe1
-; ASM-NEXT: adds r2, r4, r2
-; DIS-NEXT: 180: e0942002
-; IASM-NEXT: .byte 0x2
-; IASM-NEXT: .byte 0x20
-; IASM-NEXT: .byte 0x94
-; IASM-NEXT: .byte 0xe0
-
-; ASM-NEXT: adc r3, r5, r3
-; ASM-NEXT: # r1 = def.pseudo
-; DIS-NEXT: 184: e0a53003
-; IASM-NEXT: .byte 0x3
-; IASM-NEXT: .byte 0x30
-; IASM-NEXT: .byte 0xa5
-; IASM-NEXT: .byte 0xe0
-
; ***** Example of strexd *****
-; ASM-NEXT: strexd r1, r2, r3, [r0]
-; DIS-NEXT: 188: e1a01f92
-; IASM-NEXT: .byte 0x92
-; IASM-NEXT: .byte 0x1f
-; IASM-NEXT: .byte 0xa0
+; ASM: strexd r4, r0, r1, [r6]
+; DIS: 158: e1a64f90
+; IASM: .byte 0x90
+; IASM-NEXT: .byte 0x4f
+; IASM-NEXT: .byte 0xa6
; IASM-NEXT: .byte 0xe1
ret i64 %v
diff --git a/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
index 12d4267..42a6ed8 100644
--- a/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
@@ -246,10 +246,10 @@
; CHECK: movq QWORD {{.*}},x{{.*}}
; CHECK: mfence
; ARM32-LABEL: test_atomic_store_64_const
-; ARM32: dmb
; ARM32: movw [[T0:r[0-9]+]], #12274
; ARM32: movt [[T0]], #29646
; ARM32: movw r{{[0-9]+}}, #2874
+; ARM32: dmb
; ARM32: .L[[RETRY:.*]]:
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [[MEM:.*]]
; ARM32: strexd [[S:r[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}, [[MEM]]
@@ -342,7 +342,7 @@
; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: adds
-; ARM32-NEXT: adc
+; ARM32: adc
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne
; ARM32: dmb
@@ -359,7 +359,7 @@
; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: adds
-; ARM32-NEXT: adc
+; ARM32: adc
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne
; ARM32: dmb
@@ -400,7 +400,7 @@
; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: adds
-; ARM32-NEXT: adc
+; ARM32: adc
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne
; ARM32: dmb
@@ -457,7 +457,7 @@
; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: adds
-; ARM32-NEXT: adc
+; ARM32: adc
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne
; ARM32: dmb
@@ -543,7 +543,7 @@
; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: subs
-; ARM32-NEXT: sbc
+; ARM32: sbc
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne
; ARM32: dmb
@@ -602,9 +602,9 @@
}
; CHECK-LABEL: test_atomic_rmw_or_8_global
; ARM32-LABEL: test_atomic_rmw_or_8_global
+; ARM32: dmb
; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal8
; ARM32: movt [[PTR]], #:upper16:SzGlobal8
-; ARM32: dmb
; ARM32: ldrexb r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
; ARM32: orr
; ARM32: strexb
@@ -643,9 +643,9 @@
}
; CHECK-LABEL: test_atomic_rmw_or_16_global
; ARM32-LABEL: test_atomic_rmw_or_16_global
+; ARM32: dmb
; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal16
; ARM32: movt [[PTR]], #:upper16:SzGlobal16
-; ARM32: dmb
; ARM32: ldrexh r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
; ARM32: orr
; ARM32: strexh
@@ -680,9 +680,9 @@
}
; CHECK-LABEL: test_atomic_rmw_or_32_global
; ARM32-LABEL: test_atomic_rmw_or_32_global
+; ARM32: dmb
; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal32
; ARM32: movt [[PTR]], #:upper16:SzGlobal32
-; ARM32: dmb
; ARM32: ldrex r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
; ARM32: orr
; ARM32: strex
@@ -709,7 +709,7 @@
; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: orr
-; ARM32-NEXT: orr
+; ARM32: orr
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne
; ARM32: dmb
@@ -819,7 +819,7 @@
; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: and
-; ARM32-NEXT: and
+; ARM32: and
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne
; ARM32: dmb
@@ -927,7 +927,7 @@
; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: eor
-; ARM32-NEXT: eor
+; ARM32: eor
; ARM32: strexd r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, [r{{[0-9]+}}]
; ARM32: bne
; ARM32: dmb
@@ -1067,8 +1067,8 @@
; ARM32: dmb
; ARM32: ldrexb
; ARM32: cmp
+; ARM32: {{strb|mov}}
; ARM32: strexbeq
-; ARM32: {{strb|mov}}ne
; ARM32: cmpeq
; ARM32: bne
; ARM32: dmb
@@ -1091,8 +1091,8 @@
; ARM32: dmb
; ARM32: ldrexh
; ARM32: cmp
+; ARM32: {{strh|mov}}
; ARM32: strexheq
-; ARM32: {{strh|mov}}ne
; ARM32: cmpeq
; ARM32: bne
; ARM32: dmb
@@ -1112,8 +1112,8 @@
; ARM32: dmb
; ARM32: ldrex
; ARM32: cmp
+; ARM32: {{str|mov}}
; ARM32: strexeq
-; ARM32: {{str|mov}}ne
; ARM32: cmpeq
; ARM32: bne
; ARM32: dmb
@@ -1140,10 +1140,10 @@
; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}}
; ARM32: cmp
-; ARM32-NEXT: cmpeq
+; ARM32: cmpeq
+; ARM32: mov
+; ARM32: mov
; ARM32: strexdeq r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
-; ARM32: {{str|mov}}ne
-; ARM32: {{str|mov}}ne
; ARM32: cmpeq
; ARM32: bne
; ARM32: dmb
@@ -1163,10 +1163,10 @@
; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}}
; ARM32: cmp
-; ARM32-NEXT: cmpeq
+; ARM32: cmpeq
+; ARM32: mov
+; ARM32: mov
; ARM32: strexdeq r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
-; ARM32: {{str|mov}}ne
-; ARM32: {{str|mov}}ne
; ARM32: cmpeq
; ARM32: bne
; ARM32: dmb
@@ -1195,10 +1195,10 @@
; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}}
; ARM32: cmp
-; ARM32-NEXT: cmpeq
+; ARM32: cmpeq
+; ARM32: mov
+; ARM32: mov
; ARM32: strexdeq r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
-; ARM32: {{str|mov}}ne
-; ARM32: {{str|mov}}ne
; ARM32: cmpeq
; ARM32: bne
; ARM32: dmb
@@ -1241,10 +1241,10 @@
; ARM32: dmb
; ARM32: ldrexd r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR:r[0-9]+]]{{[]]}}
; ARM32: cmp
-; ARM32-NEXT: cmpeq
+; ARM32: cmpeq
+; ARM32: mov
+; ARM32: mov
; ARM32: strexdeq r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
-; ARM32: {{str|mov}}ne
-; ARM32: {{str|mov}}ne
; ARM32: cmpeq
; ARM32: bne
; ARM32: dmb
@@ -1265,7 +1265,6 @@
; ARM32: ldrex
; ARM32: cmp
; ARM32: strexeq
-; ARM32: {{str|mov}}ne
; ARM32: cmpeq
; ARM32: bne
; ARM32: dmb