| ; RUN: llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | FileCheck %s | 
 | target triple = "nvptx64-nvidia-cuda" | 
 |  | 
 | declare void @foo() | 
 | declare void @llvm.nvvm.barrier0() | 
 |  | 
 | ; syncthreads shouldn't be duplicated. | 
 | ; CHECK: .func call_syncthreads | 
 | ; CHECK: bar.sync | 
 | ; CHECK-NOT: bar.sync | 
 | define void @call_syncthreads(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind { | 
 |   br i1 %cond, label %L1, label %L2 | 
 |   br i1 %cond2, label %Ret, label %L1 | 
 | Ret: | 
 |   ret void | 
 | L1: | 
 |   store i32 0, i32* %a | 
 |   br label %L42 | 
 | L2: | 
 |   store i32 1, i32* %a | 
 |   br label %L42 | 
 | L42: | 
 |   call void @llvm.nvvm.barrier0() | 
 |   br label %Ret | 
 | } | 
 |  | 
 | ; Check that call_syncthreads really does trigger tail duplication. | 
 | ; CHECK: .func call_foo | 
 | ; CHECK: call | 
 | ; CHECK: call | 
 | define void @call_foo(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind { | 
 |   br i1 %cond, label %L1, label %L2 | 
 |   br i1 %cond2, label %Ret, label %L1 | 
 | Ret: | 
 |   ret void | 
 | L1: | 
 |   store i32 0, i32* %a | 
 |   br label %L42 | 
 | L2: | 
 |   store i32 1, i32* %a | 
 |   br label %L42 | 
 | L42: | 
 |   call void @foo() | 
 |   br label %Ret | 
 | } |