blob: 122347ca135a3ecbb36fefa4712e33b3cff33c91 [file] [log] [blame]
; Tests various aspects of x86 opcode encodings. E.g., some opcodes like
; those for pmull vary more wildly depending on operand size (rather than
; follow a usual pattern).
; RUN: %p2i -i %s --args -O2 -mattr=sse4.1 -sandbox --verbose none \
; RUN: | llvm-mc -triple=i686-none-nacl -filetype=obj \
; RUN: | llvm-objdump -d --symbolize -x86-asm-syntax=intel - | FileCheck %s
define <8 x i16> @test_mul_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry:
%res = mul <8 x i16> %arg0, %arg1
ret <8 x i16> %res
; CHECK-LABEL: test_mul_v8i16
; CHECK: 66 0f d5 c1 pmullw xmm0, xmm1
}
; Test register and address mode encoding.
define <8 x i16> @test_mul_v8i16_more_regs(<8 x i1> %cond, <8 x i16> %arg0, <8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3, <8 x i16> %arg4, <8 x i16> %arg5, <8 x i16> %arg6, <8 x i16> %arg7, <8 x i16> %arg8) {
entry:
%res1 = mul <8 x i16> %arg0, %arg1
%res2 = mul <8 x i16> %arg0, %arg2
%res3 = mul <8 x i16> %arg0, %arg3
%res4 = mul <8 x i16> %arg0, %arg4
%res5 = mul <8 x i16> %arg0, %arg5
%res6 = mul <8 x i16> %arg0, %arg6
%res7 = mul <8 x i16> %arg0, %arg7
%res8 = mul <8 x i16> %arg0, %arg8
%res_acc1 = select <8 x i1> %cond, <8 x i16> %res1, <8 x i16> %res2
%res_acc2 = select <8 x i1> %cond, <8 x i16> %res3, <8 x i16> %res4
%res_acc3 = select <8 x i1> %cond, <8 x i16> %res5, <8 x i16> %res6
%res_acc4 = select <8 x i1> %cond, <8 x i16> %res7, <8 x i16> %res8
%res_acc1_3 = select <8 x i1> %cond, <8 x i16> %res_acc1, <8 x i16> %res_acc3
%res_acc2_4 = select <8 x i1> %cond, <8 x i16> %res_acc2, <8 x i16> %res_acc4
%res = select <8 x i1> %cond, <8 x i16> %res_acc1_3, <8 x i16> %res_acc2_4
ret <8 x i16> %res
; CHECK-LABEL: test_mul_v8i16_more_regs
; CHECK-DAG: 66 0f d5 c2 pmullw xmm0, xmm2
; CHECK-DAG: 66 0f d5 c3 pmullw xmm0, xmm3
; CHECK-DAG: 66 0f d5 c4 pmullw xmm0, xmm4
; CHECK-DAG: 66 0f d5 c5 pmullw xmm0, xmm5
; CHECK-DAG: 66 0f d5 c6 pmullw xmm0, xmm6
; CHECK-DAG: 66 0f d5 c7 pmullw xmm0, xmm7
; CHECK-DAG: 66 0f d5 44 24 70 pmullw xmm0, xmmword ptr [esp + 112]
; CHECK-DAG: 66 0f d5 8c 24 80 00 00 00 pmullw xmm1, xmmword ptr [esp + 128]
}
define <4 x i32> @test_mul_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
entry:
%res = mul <4 x i32> %arg0, %arg1
ret <4 x i32> %res
; CHECK-LABEL: test_mul_v4i32
; CHECK: 66 0f 38 40 c1 pmulld xmm0, xmm1
}
define <4 x i32> @test_mul_v4i32_more_regs(<4 x i1> %cond, <4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3, <4 x i32> %arg4, <4 x i32> %arg5, <4 x i32> %arg6, <4 x i32> %arg7, <4 x i32> %arg8) {
entry:
%res1 = mul <4 x i32> %arg0, %arg1
%res2 = mul <4 x i32> %arg0, %arg2
%res3 = mul <4 x i32> %arg0, %arg3
%res4 = mul <4 x i32> %arg0, %arg4
%res5 = mul <4 x i32> %arg0, %arg5
%res6 = mul <4 x i32> %arg0, %arg6
%res7 = mul <4 x i32> %arg0, %arg7
%res8 = mul <4 x i32> %arg0, %arg8
%res_acc1 = select <4 x i1> %cond, <4 x i32> %res1, <4 x i32> %res2
%res_acc2 = select <4 x i1> %cond, <4 x i32> %res3, <4 x i32> %res4
%res_acc3 = select <4 x i1> %cond, <4 x i32> %res5, <4 x i32> %res6
%res_acc4 = select <4 x i1> %cond, <4 x i32> %res7, <4 x i32> %res8
%res_acc1_3 = select <4 x i1> %cond, <4 x i32> %res_acc1, <4 x i32> %res_acc3
%res_acc2_4 = select <4 x i1> %cond, <4 x i32> %res_acc2, <4 x i32> %res_acc4
%res = select <4 x i1> %cond, <4 x i32> %res_acc1_3, <4 x i32> %res_acc2_4
ret <4 x i32> %res
; CHECK-LABEL: test_mul_v4i32_more_regs
; CHECK-DAG: 66 0f 38 40 c2 pmulld xmm0, xmm2
; CHECK-DAG: 66 0f 38 40 c3 pmulld xmm0, xmm3
; CHECK-DAG: 66 0f 38 40 c4 pmulld xmm0, xmm4
; CHECK-DAG: 66 0f 38 40 c5 pmulld xmm0, xmm5
; CHECK-DAG: 66 0f 38 40 c6 pmulld xmm0, xmm6
; CHECK-DAG: 66 0f 38 40 c7 pmulld xmm0, xmm7
; CHECK-DAG: 66 0f 38 40 44 24 70 pmulld xmm0, xmmword ptr [esp + 112]
; CHECK-DAG: 66 0f 38 40 8c 24 80 00 00 00 pmulld xmm1, xmmword ptr [esp + 128]
}
; Test movq, which is used by atomic stores.
declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32)
define void @test_atomic_store_64(i32 %iptr, i32 %iptr2, i32 %iptr3, i64 %v) {
entry:
%ptr = inttoptr i32 %iptr to i64*
%ptr2 = inttoptr i32 %iptr2 to i64*
%ptr3 = inttoptr i32 %iptr3 to i64*
call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr2, i32 6)
call void @llvm.nacl.atomic.store.i64(i64 1234567891024, i64* %ptr, i32 6)
call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr3, i32 6)
ret void
}
; CHECK-LABEL: test_atomic_store_64
; CHECK-DAG: f3 0f 7e 04 24 movq xmm0, qword ptr [esp]
; CHECK-DAG: f3 0f 7e 44 24 08 movq xmm0, qword ptr [esp + 8]
; CHECK-DAG: 66 0f d6 0{{.*}} movq qword ptr [e{{.*}}], xmm0
; Test "movups" via vector stores and loads.
define void @store_v16xI8(i32 %addr, i32 %addr2, i32 %addr3, <16 x i8> %v) {
%addr_v16xI8 = inttoptr i32 %addr to <16 x i8>*
%addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>*
%addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>*
store <16 x i8> %v, <16 x i8>* %addr2_v16xI8, align 1
store <16 x i8> %v, <16 x i8>* %addr_v16xI8, align 1
store <16 x i8> %v, <16 x i8>* %addr3_v16xI8, align 1
ret void
}
; CHECK-LABEL: store_v16xI8
; CHECK: 0f 11 0{{.*}} movups xmmword ptr [e{{.*}}], xmm0
define <16 x i8> @load_v16xI8(i32 %addr, i32 %addr2, i32 %addr3) {
%addr_v16xI8 = inttoptr i32 %addr to <16 x i8>*
%addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>*
%addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>*
%res1 = load <16 x i8>* %addr2_v16xI8, align 1
%res2 = load <16 x i8>* %addr_v16xI8, align 1
%res3 = load <16 x i8>* %addr3_v16xI8, align 1
%res12 = add <16 x i8> %res1, %res2
%res123 = add <16 x i8> %res12, %res3
ret <16 x i8> %res123
}
; CHECK-LABEL: load_v16xI8
; CHECK: 0f 10 0{{.*}} movups xmm0, xmmword ptr [e{{.*}}]
; Test segment override prefix. This happens w/ nacl.read.tp.
declare i8* @llvm.nacl.read.tp()
; Also test more address complex operands via address-mode-optimization.
define i32 @test_nacl_read_tp_more_addressing() {
entry:
%ptr = call i8* @llvm.nacl.read.tp()
%__1 = ptrtoint i8* %ptr to i32
%x = add i32 %__1, %__1
%__3 = inttoptr i32 %x to i32*
%v = load i32* %__3, align 1
%v_add = add i32 %v, 1
%ptr2 = call i8* @llvm.nacl.read.tp()
%__6 = ptrtoint i8* %ptr2 to i32
%y = add i32 %__6, -128
%__8 = inttoptr i32 %y to i32*
%v_add2 = add i32 %v, 4
store i32 %v_add2, i32* %__8, align 1
%z = add i32 %__6, 256
%__9 = inttoptr i32 %z to i32*
%v_add3 = add i32 %v, 91
store i32 %v_add2, i32* %__9, align 1
ret i32 %v
}
; CHECK-LABEL: test_nacl_read_tp_more_addressing
; CHECK: 65 8b 05 00 00 00 00 mov eax, dword ptr gs:[0]
; CHECK: 8b 04 00 mov eax, dword ptr [eax + eax]
; CHECK: 65 8b 0d 00 00 00 00 mov ecx, dword ptr gs:[0]
; CHECK: 89 51 80 mov dword ptr [ecx - 128], edx
; CHECK: 89 91 00 01 00 00 mov dword ptr [ecx + 256], edx
; The 16-bit pinsrw/pextrw (SSE2) are quite different from
; the pinsr{b,d}/pextr{b,d} (SSE4.1).
define <4 x i32> @test_pinsrd(<4 x i32> %vec, i32 %elt1, i32 %elt2, i32 %elt3, i32 %elt4) {
entry:
%elt12 = add i32 %elt1, %elt2
%elt34 = add i32 %elt3, %elt4
%res1 = insertelement <4 x i32> %vec, i32 %elt12, i32 1
%res2 = insertelement <4 x i32> %res1, i32 %elt34, i32 2
%res3 = insertelement <4 x i32> %res2, i32 %elt1, i32 3
ret <4 x i32> %res3
}
; CHECK-LABEL: test_pinsrd:
; CHECK-DAG: 66 0f 3a 22 c{{.*}} 01 pinsrd xmm0, e{{.*}}, 1
; CHECK-DAG: 66 0f 3a 22 c{{.*}} 02 pinsrd xmm0, e{{.*}}, 2
; CHECK-DAG: 66 0f 3a 22 c{{.*}} 03 pinsrd xmm0, e{{.*}}, 3
define <16 x i8> @test_pinsrb(<16 x i8> %vec, i32 %elt1_w, i32 %elt2_w, i32 %elt3_w, i32 %elt4_w) {
entry:
%elt1 = trunc i32 %elt1_w to i8
%elt2 = trunc i32 %elt2_w to i8
%elt3 = trunc i32 %elt3_w to i8
%elt4 = trunc i32 %elt4_w to i8
%elt12 = add i8 %elt1, %elt2
%elt34 = add i8 %elt3, %elt4
%res1 = insertelement <16 x i8> %vec, i8 %elt12, i32 1
%res2 = insertelement <16 x i8> %res1, i8 %elt34, i32 7
%res3 = insertelement <16 x i8> %res2, i8 %elt1, i32 15
ret <16 x i8> %res3
}
; CHECK-LABEL: test_pinsrb:
; CHECK-DAG: 66 0f 3a 20 c{{.*}} 01 pinsrb xmm0, e{{.*}}, 1
; CHECK-DAG: 66 0f 3a 20 c{{.*}} 07 pinsrb xmm0, e{{.*}}, 7
; CHECK-DAG: 66 0f 3a 20 {{.*}} 0f pinsrb xmm0, byte ptr {{.*}}, 15
define <8 x i16> @test_pinsrw(<8 x i16> %vec, i32 %elt1_w, i32 %elt2_w, i32 %elt3_w, i32 %elt4_w) {
entry:
%elt1 = trunc i32 %elt1_w to i16
%elt2 = trunc i32 %elt2_w to i16
%elt3 = trunc i32 %elt3_w to i16
%elt4 = trunc i32 %elt4_w to i16
%elt12 = add i16 %elt1, %elt2
%elt34 = add i16 %elt3, %elt4
%res1 = insertelement <8 x i16> %vec, i16 %elt12, i32 1
%res2 = insertelement <8 x i16> %res1, i16 %elt34, i32 4
%res3 = insertelement <8 x i16> %res2, i16 %elt1, i32 7
ret <8 x i16> %res3
}
; CHECK-LABEL: test_pinsrw:
; CHECK-DAG: 66 0f c4 c{{.*}} 01 pinsrw xmm0, e{{.*}}, 1
; CHECK-DAG: 66 0f c4 c{{.*}} 04 pinsrw xmm0, e{{.*}}, 4
; CHECK-DAG: 66 0f c4 c{{.*}} 07 pinsrw xmm0, e{{.*}}, 7
define i32 @test_pextrd(i32 %c, <4 x i32> %vec1, <4 x i32> %vec2, <4 x i32> %vec3, <4 x i32> %vec4) {
entry:
switch i32 %c, label %three [i32 0, label %zero
i32 1, label %one
i32 2, label %two]
zero:
%res0 = extractelement <4 x i32> %vec1, i32 0
ret i32 %res0
one:
%res1 = extractelement <4 x i32> %vec2, i32 1
ret i32 %res1
two:
%res2 = extractelement <4 x i32> %vec3, i32 2
ret i32 %res2
three:
%res3 = extractelement <4 x i32> %vec4, i32 3
ret i32 %res3
}
; CHECK-LABEL: test_pextrd
; CHECK-DAG: 66 0f 3a 16 c0 00 pextrd eax, xmm0, 0
; CHECK-DAG: 66 0f 3a 16 c8 01 pextrd eax, xmm1, 1
; CHECK-DAG: 66 0f 3a 16 d0 02 pextrd eax, xmm2, 2
; CHECK-DAG: 66 0f 3a 16 d8 03 pextrd eax, xmm3, 3
define i32 @test_pextrb(i32 %c, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, <16 x i8> %vec4) {
entry:
switch i32 %c, label %three [i32 0, label %zero
i32 1, label %one
i32 2, label %two]
zero:
%res0 = extractelement <16 x i8> %vec1, i32 0
%res0_ext = zext i8 %res0 to i32
ret i32 %res0_ext
one:
%res1 = extractelement <16 x i8> %vec2, i32 6
%res1_ext = zext i8 %res1 to i32
ret i32 %res1_ext
two:
%res2 = extractelement <16 x i8> %vec3, i32 12
%res2_ext = zext i8 %res2 to i32
ret i32 %res2_ext
three:
%res3 = extractelement <16 x i8> %vec4, i32 15
%res3_ext = zext i8 %res3 to i32
ret i32 %res3_ext
}
; CHECK-LABEL: test_pextrb
; CHECK-DAG: 66 0f 3a 14 c0 00 pextrb eax, xmm0, 0
; CHECK-DAG: 66 0f 3a 14 c8 06 pextrb eax, xmm1, 6
; CHECK-DAG: 66 0f 3a 14 d0 0c pextrb eax, xmm2, 12
; CHECK-DAG: 66 0f 3a 14 d8 0f pextrb eax, xmm3, 15
define i32 @test_pextrw(i32 %c, <8 x i16> %vec1, <8 x i16> %vec2, <8 x i16> %vec3, <8 x i16> %vec4) {
entry:
switch i32 %c, label %three [i32 0, label %zero
i32 1, label %one
i32 2, label %two]
zero:
%res0 = extractelement <8 x i16> %vec1, i32 0
%res0_ext = zext i16 %res0 to i32
ret i32 %res0_ext
one:
%res1 = extractelement <8 x i16> %vec2, i32 2
%res1_ext = zext i16 %res1 to i32
ret i32 %res1_ext
two:
%res2 = extractelement <8 x i16> %vec3, i32 5
%res2_ext = zext i16 %res2 to i32
ret i32 %res2_ext
three:
%res3 = extractelement <8 x i16> %vec4, i32 7
%res3_ext = zext i16 %res3 to i32
ret i32 %res3_ext
}
; CHECK-LABEL: test_pextrw
; CHECK-DAG: 66 0f c5 c0 00 pextrw eax, xmm0, 0
; CHECK-DAG: 66 0f c5 c1 02 pextrw eax, xmm1, 2
; CHECK-DAG: 66 0f c5 c2 05 pextrw eax, xmm2, 5
; CHECK-DAG: 66 0f c5 c3 07 pextrw eax, xmm3, 7