| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=CHECK |
| ; RUN: llc < %s -mtriple=x86_64-linux --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED |
| ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2 |
| ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512 |
| |
| ; ModuleID = '../testSFB/testOverlapBlocks.c' |
| source_filename = "../testSFB/testOverlapBlocks.c" |
| target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" |
| target triple = "x86_64-unknown-linux-gnu" |
| |
| ; Function Attrs: nounwind uwtable |
| define dso_local void @test_overlap_1(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { |
| ; CHECK-LABEL: test_overlap_1: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movl $7, -8(%rdi) |
| ; CHECK-NEXT: movq -16(%rdi), %rax |
| ; CHECK-NEXT: movq %rax, (%rdi) |
| ; CHECK-NEXT: movl -8(%rdi), %eax |
| ; CHECK-NEXT: movl %eax, 8(%rdi) |
| ; CHECK-NEXT: movl -4(%rdi), %eax |
| ; CHECK-NEXT: movl %eax, 12(%rdi) |
| ; CHECK-NEXT: movslq %esi, %rax |
| ; CHECK-NEXT: movq %rax, -9(%rdi) |
| ; CHECK-NEXT: movq %rax, -16(%rdi) |
| ; CHECK-NEXT: movb $0, -1(%rdi) |
| ; CHECK-NEXT: movq -16(%rdi), %rax |
| ; CHECK-NEXT: movq %rax, 16(%rdi) |
| ; CHECK-NEXT: movl -8(%rdi), %eax |
| ; CHECK-NEXT: movl %eax, 24(%rdi) |
| ; CHECK-NEXT: movzwl -4(%rdi), %eax |
| ; CHECK-NEXT: movw %ax, 28(%rdi) |
| ; CHECK-NEXT: movb -2(%rdi), %al |
| ; CHECK-NEXT: movb %al, 30(%rdi) |
| ; CHECK-NEXT: movb -1(%rdi), %al |
| ; CHECK-NEXT: movb %al, 31(%rdi) |
| ; CHECK-NEXT: retq |
| ; |
| ; DISABLED-LABEL: test_overlap_1: |
| ; DISABLED: # %bb.0: # %entry |
| ; DISABLED-NEXT: movl $7, -8(%rdi) |
| ; DISABLED-NEXT: movups -16(%rdi), %xmm0 |
| ; DISABLED-NEXT: movups %xmm0, (%rdi) |
| ; DISABLED-NEXT: movslq %esi, %rax |
| ; DISABLED-NEXT: movq %rax, -9(%rdi) |
| ; DISABLED-NEXT: movq %rax, -16(%rdi) |
| ; DISABLED-NEXT: movb $0, -1(%rdi) |
| ; DISABLED-NEXT: movups -16(%rdi), %xmm0 |
| ; DISABLED-NEXT: movups %xmm0, 16(%rdi) |
| ; DISABLED-NEXT: retq |
| ; |
| ; CHECK-AVX2-LABEL: test_overlap_1: |
| ; CHECK-AVX2: # %bb.0: # %entry |
| ; CHECK-AVX2-NEXT: movl $7, -8(%rdi) |
| ; CHECK-AVX2-NEXT: movq -16(%rdi), %rax |
| ; CHECK-AVX2-NEXT: movq %rax, (%rdi) |
| ; CHECK-AVX2-NEXT: movl -8(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movl %eax, 8(%rdi) |
| ; CHECK-AVX2-NEXT: movl -4(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movl %eax, 12(%rdi) |
| ; CHECK-AVX2-NEXT: movslq %esi, %rax |
| ; CHECK-AVX2-NEXT: movq %rax, -9(%rdi) |
| ; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) |
| ; CHECK-AVX2-NEXT: movb $0, -1(%rdi) |
| ; CHECK-AVX2-NEXT: movq -16(%rdi), %rax |
| ; CHECK-AVX2-NEXT: movq %rax, 16(%rdi) |
| ; CHECK-AVX2-NEXT: movl -8(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movl %eax, 24(%rdi) |
| ; CHECK-AVX2-NEXT: movzwl -4(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movw %ax, 28(%rdi) |
| ; CHECK-AVX2-NEXT: movb -2(%rdi), %al |
| ; CHECK-AVX2-NEXT: movb %al, 30(%rdi) |
| ; CHECK-AVX2-NEXT: movb -1(%rdi), %al |
| ; CHECK-AVX2-NEXT: movb %al, 31(%rdi) |
| ; CHECK-AVX2-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: test_overlap_1: |
| ; CHECK-AVX512: # %bb.0: # %entry |
| ; CHECK-AVX512-NEXT: movl $7, -8(%rdi) |
| ; CHECK-AVX512-NEXT: movq -16(%rdi), %rax |
| ; CHECK-AVX512-NEXT: movq %rax, (%rdi) |
| ; CHECK-AVX512-NEXT: movl -8(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movl %eax, 8(%rdi) |
| ; CHECK-AVX512-NEXT: movl -4(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movl %eax, 12(%rdi) |
| ; CHECK-AVX512-NEXT: movslq %esi, %rax |
| ; CHECK-AVX512-NEXT: movq %rax, -9(%rdi) |
| ; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) |
| ; CHECK-AVX512-NEXT: movb $0, -1(%rdi) |
| ; CHECK-AVX512-NEXT: movq -16(%rdi), %rax |
| ; CHECK-AVX512-NEXT: movq %rax, 16(%rdi) |
| ; CHECK-AVX512-NEXT: movl -8(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movl %eax, 24(%rdi) |
| ; CHECK-AVX512-NEXT: movzwl -4(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movw %ax, 28(%rdi) |
| ; CHECK-AVX512-NEXT: movb -2(%rdi), %al |
| ; CHECK-AVX512-NEXT: movb %al, 30(%rdi) |
| ; CHECK-AVX512-NEXT: movb -1(%rdi), %al |
| ; CHECK-AVX512-NEXT: movb %al, 31(%rdi) |
| ; CHECK-AVX512-NEXT: retq |
| entry: |
| %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 |
| %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8 |
| %0 = bitcast i8* %add.ptr1 to i32* |
| store i32 7, i32* %0, align 4 |
| tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) |
| %conv = sext i32 %x to i64 |
| %add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9 |
| %1 = bitcast i8* %add.ptr2 to i64* |
| store i64 %conv, i64* %1, align 8 |
| %2 = bitcast i8* %add.ptr to i64* |
| store i64 %conv, i64* %2, align 8 |
| %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1 |
| store i8 0, i8* %add.ptr5, align 1 |
| %add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16 |
| tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false) |
| ret void |
| } |
| |
| ; Function Attrs: argmemonly nounwind |
| declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 |
| |
| ; Function Attrs: nounwind uwtable |
| define dso_local void @test_overlap_2(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { |
| ; CHECK-LABEL: test_overlap_2: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movslq %esi, %rax |
| ; CHECK-NEXT: movq %rax, -16(%rdi) |
| ; CHECK-NEXT: movq -16(%rdi), %rcx |
| ; CHECK-NEXT: movq %rcx, (%rdi) |
| ; CHECK-NEXT: movq -8(%rdi), %rcx |
| ; CHECK-NEXT: movq %rcx, 8(%rdi) |
| ; CHECK-NEXT: movq %rax, -8(%rdi) |
| ; CHECK-NEXT: movl $7, -12(%rdi) |
| ; CHECK-NEXT: movl -16(%rdi), %eax |
| ; CHECK-NEXT: movl %eax, 16(%rdi) |
| ; CHECK-NEXT: movl -12(%rdi), %eax |
| ; CHECK-NEXT: movl %eax, 20(%rdi) |
| ; CHECK-NEXT: movq -8(%rdi), %rax |
| ; CHECK-NEXT: movq %rax, 24(%rdi) |
| ; CHECK-NEXT: retq |
| ; |
| ; DISABLED-LABEL: test_overlap_2: |
| ; DISABLED: # %bb.0: # %entry |
| ; DISABLED-NEXT: movslq %esi, %rax |
| ; DISABLED-NEXT: movq %rax, -16(%rdi) |
| ; DISABLED-NEXT: movups -16(%rdi), %xmm0 |
| ; DISABLED-NEXT: movups %xmm0, (%rdi) |
| ; DISABLED-NEXT: movq %rax, -8(%rdi) |
| ; DISABLED-NEXT: movl $7, -12(%rdi) |
| ; DISABLED-NEXT: movups -16(%rdi), %xmm0 |
| ; DISABLED-NEXT: movups %xmm0, 16(%rdi) |
| ; DISABLED-NEXT: retq |
| ; |
| ; CHECK-AVX2-LABEL: test_overlap_2: |
| ; CHECK-AVX2: # %bb.0: # %entry |
| ; CHECK-AVX2-NEXT: movslq %esi, %rax |
| ; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) |
| ; CHECK-AVX2-NEXT: movq -16(%rdi), %rcx |
| ; CHECK-AVX2-NEXT: movq %rcx, (%rdi) |
| ; CHECK-AVX2-NEXT: movq -8(%rdi), %rcx |
| ; CHECK-AVX2-NEXT: movq %rcx, 8(%rdi) |
| ; CHECK-AVX2-NEXT: movq %rax, -8(%rdi) |
| ; CHECK-AVX2-NEXT: movl $7, -12(%rdi) |
| ; CHECK-AVX2-NEXT: movl -16(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movl %eax, 16(%rdi) |
| ; CHECK-AVX2-NEXT: movl -12(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movl %eax, 20(%rdi) |
| ; CHECK-AVX2-NEXT: movq -8(%rdi), %rax |
| ; CHECK-AVX2-NEXT: movq %rax, 24(%rdi) |
| ; CHECK-AVX2-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: test_overlap_2: |
| ; CHECK-AVX512: # %bb.0: # %entry |
| ; CHECK-AVX512-NEXT: movslq %esi, %rax |
| ; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) |
| ; CHECK-AVX512-NEXT: movq -16(%rdi), %rcx |
| ; CHECK-AVX512-NEXT: movq %rcx, (%rdi) |
| ; CHECK-AVX512-NEXT: movq -8(%rdi), %rcx |
| ; CHECK-AVX512-NEXT: movq %rcx, 8(%rdi) |
| ; CHECK-AVX512-NEXT: movq %rax, -8(%rdi) |
| ; CHECK-AVX512-NEXT: movl $7, -12(%rdi) |
| ; CHECK-AVX512-NEXT: movl -16(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movl %eax, 16(%rdi) |
| ; CHECK-AVX512-NEXT: movl -12(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movl %eax, 20(%rdi) |
| ; CHECK-AVX512-NEXT: movq -8(%rdi), %rax |
| ; CHECK-AVX512-NEXT: movq %rax, 24(%rdi) |
| ; CHECK-AVX512-NEXT: retq |
| entry: |
| %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 |
| %conv = sext i32 %x to i64 |
| %0 = bitcast i8* %add.ptr to i64* |
| store i64 %conv, i64* %0, align 8 |
| tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) |
| %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -8 |
| %1 = bitcast i8* %add.ptr3 to i64* |
| store i64 %conv, i64* %1, align 8 |
| %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -12 |
| %2 = bitcast i8* %add.ptr4 to i32* |
| store i32 7, i32* %2, align 4 |
| %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16 |
| tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false) |
| ret void |
| } |
| |
| ; Function Attrs: nounwind uwtable |
| define dso_local void @test_overlap_3(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { |
| ; CHECK-LABEL: test_overlap_3: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movl $7, -10(%rdi) |
| ; CHECK-NEXT: movl -16(%rdi), %eax |
| ; CHECK-NEXT: movl %eax, (%rdi) |
| ; CHECK-NEXT: movzwl -12(%rdi), %eax |
| ; CHECK-NEXT: movw %ax, 4(%rdi) |
| ; CHECK-NEXT: movl -10(%rdi), %eax |
| ; CHECK-NEXT: movl %eax, 6(%rdi) |
| ; CHECK-NEXT: movl -6(%rdi), %eax |
| ; CHECK-NEXT: movl %eax, 10(%rdi) |
| ; CHECK-NEXT: movzwl -2(%rdi), %eax |
| ; CHECK-NEXT: movw %ax, 14(%rdi) |
| ; CHECK-NEXT: movslq %esi, %rax |
| ; CHECK-NEXT: movq %rax, -9(%rdi) |
| ; CHECK-NEXT: movq %rax, -16(%rdi) |
| ; CHECK-NEXT: movb $0, -1(%rdi) |
| ; CHECK-NEXT: movq -16(%rdi), %rax |
| ; CHECK-NEXT: movq %rax, 16(%rdi) |
| ; CHECK-NEXT: movzwl -8(%rdi), %eax |
| ; CHECK-NEXT: movw %ax, 24(%rdi) |
| ; CHECK-NEXT: movl -6(%rdi), %eax |
| ; CHECK-NEXT: movl %eax, 26(%rdi) |
| ; CHECK-NEXT: movb -2(%rdi), %al |
| ; CHECK-NEXT: movb %al, 30(%rdi) |
| ; CHECK-NEXT: movb -1(%rdi), %al |
| ; CHECK-NEXT: movb %al, 31(%rdi) |
| ; CHECK-NEXT: retq |
| ; |
| ; DISABLED-LABEL: test_overlap_3: |
| ; DISABLED: # %bb.0: # %entry |
| ; DISABLED-NEXT: movl $7, -10(%rdi) |
| ; DISABLED-NEXT: movups -16(%rdi), %xmm0 |
| ; DISABLED-NEXT: movups %xmm0, (%rdi) |
| ; DISABLED-NEXT: movslq %esi, %rax |
| ; DISABLED-NEXT: movq %rax, -9(%rdi) |
| ; DISABLED-NEXT: movq %rax, -16(%rdi) |
| ; DISABLED-NEXT: movb $0, -1(%rdi) |
| ; DISABLED-NEXT: movups -16(%rdi), %xmm0 |
| ; DISABLED-NEXT: movups %xmm0, 16(%rdi) |
| ; DISABLED-NEXT: retq |
| ; |
| ; CHECK-AVX2-LABEL: test_overlap_3: |
| ; CHECK-AVX2: # %bb.0: # %entry |
| ; CHECK-AVX2-NEXT: movl $7, -10(%rdi) |
| ; CHECK-AVX2-NEXT: movl -16(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movl %eax, (%rdi) |
| ; CHECK-AVX2-NEXT: movzwl -12(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movw %ax, 4(%rdi) |
| ; CHECK-AVX2-NEXT: movl -10(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movl %eax, 6(%rdi) |
| ; CHECK-AVX2-NEXT: movl -6(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movl %eax, 10(%rdi) |
| ; CHECK-AVX2-NEXT: movzwl -2(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movw %ax, 14(%rdi) |
| ; CHECK-AVX2-NEXT: movslq %esi, %rax |
| ; CHECK-AVX2-NEXT: movq %rax, -9(%rdi) |
| ; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) |
| ; CHECK-AVX2-NEXT: movb $0, -1(%rdi) |
| ; CHECK-AVX2-NEXT: movq -16(%rdi), %rax |
| ; CHECK-AVX2-NEXT: movq %rax, 16(%rdi) |
| ; CHECK-AVX2-NEXT: movzwl -8(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movw %ax, 24(%rdi) |
| ; CHECK-AVX2-NEXT: movl -6(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movl %eax, 26(%rdi) |
| ; CHECK-AVX2-NEXT: movb -2(%rdi), %al |
| ; CHECK-AVX2-NEXT: movb %al, 30(%rdi) |
| ; CHECK-AVX2-NEXT: movb -1(%rdi), %al |
| ; CHECK-AVX2-NEXT: movb %al, 31(%rdi) |
| ; CHECK-AVX2-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: test_overlap_3: |
| ; CHECK-AVX512: # %bb.0: # %entry |
| ; CHECK-AVX512-NEXT: movl $7, -10(%rdi) |
| ; CHECK-AVX512-NEXT: movl -16(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movl %eax, (%rdi) |
| ; CHECK-AVX512-NEXT: movzwl -12(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movw %ax, 4(%rdi) |
| ; CHECK-AVX512-NEXT: movl -10(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movl %eax, 6(%rdi) |
| ; CHECK-AVX512-NEXT: movl -6(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movl %eax, 10(%rdi) |
| ; CHECK-AVX512-NEXT: movzwl -2(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movw %ax, 14(%rdi) |
| ; CHECK-AVX512-NEXT: movslq %esi, %rax |
| ; CHECK-AVX512-NEXT: movq %rax, -9(%rdi) |
| ; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) |
| ; CHECK-AVX512-NEXT: movb $0, -1(%rdi) |
| ; CHECK-AVX512-NEXT: movq -16(%rdi), %rax |
| ; CHECK-AVX512-NEXT: movq %rax, 16(%rdi) |
| ; CHECK-AVX512-NEXT: movzwl -8(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movw %ax, 24(%rdi) |
| ; CHECK-AVX512-NEXT: movl -6(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movl %eax, 26(%rdi) |
| ; CHECK-AVX512-NEXT: movb -2(%rdi), %al |
| ; CHECK-AVX512-NEXT: movb %al, 30(%rdi) |
| ; CHECK-AVX512-NEXT: movb -1(%rdi), %al |
| ; CHECK-AVX512-NEXT: movb %al, 31(%rdi) |
| ; CHECK-AVX512-NEXT: retq |
| entry: |
| %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 |
| %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -10 |
| %0 = bitcast i8* %add.ptr1 to i32* |
| store i32 7, i32* %0, align 4 |
| tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) |
| %conv = sext i32 %x to i64 |
| %add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9 |
| %1 = bitcast i8* %add.ptr2 to i64* |
| store i64 %conv, i64* %1, align 8 |
| %2 = bitcast i8* %add.ptr to i64* |
| store i64 %conv, i64* %2, align 8 |
| %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1 |
| store i8 0, i8* %add.ptr5, align 1 |
| %add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16 |
| tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false) |
| ret void |
| } |
| |
| ; Function Attrs: nounwind uwtable |
| define dso_local void @test_overlap_4(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { |
| ; CHECK-LABEL: test_overlap_4: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movups -16(%rdi), %xmm0 |
| ; CHECK-NEXT: movups %xmm0, (%rdi) |
| ; CHECK-NEXT: movslq %esi, %rax |
| ; CHECK-NEXT: movq %rax, -8(%rdi) |
| ; CHECK-NEXT: movl %eax, -16(%rdi) |
| ; CHECK-NEXT: movl $0, -11(%rdi) |
| ; CHECK-NEXT: movl -16(%rdi), %eax |
| ; CHECK-NEXT: movl %eax, 16(%rdi) |
| ; CHECK-NEXT: movb -12(%rdi), %al |
| ; CHECK-NEXT: movb %al, 20(%rdi) |
| ; CHECK-NEXT: movl -11(%rdi), %eax |
| ; CHECK-NEXT: movl %eax, 21(%rdi) |
| ; CHECK-NEXT: movl -7(%rdi), %eax |
| ; CHECK-NEXT: movl %eax, 25(%rdi) |
| ; CHECK-NEXT: movzwl -3(%rdi), %eax |
| ; CHECK-NEXT: movw %ax, 29(%rdi) |
| ; CHECK-NEXT: movb -1(%rdi), %al |
| ; CHECK-NEXT: movb %al, 31(%rdi) |
| ; CHECK-NEXT: retq |
| ; |
| ; DISABLED-LABEL: test_overlap_4: |
| ; DISABLED: # %bb.0: # %entry |
| ; DISABLED-NEXT: movups -16(%rdi), %xmm0 |
| ; DISABLED-NEXT: movups %xmm0, (%rdi) |
| ; DISABLED-NEXT: movslq %esi, %rax |
| ; DISABLED-NEXT: movq %rax, -8(%rdi) |
| ; DISABLED-NEXT: movl %eax, -16(%rdi) |
| ; DISABLED-NEXT: movl $0, -11(%rdi) |
| ; DISABLED-NEXT: movups -16(%rdi), %xmm0 |
| ; DISABLED-NEXT: movups %xmm0, 16(%rdi) |
| ; DISABLED-NEXT: retq |
| ; |
| ; CHECK-AVX2-LABEL: test_overlap_4: |
| ; CHECK-AVX2: # %bb.0: # %entry |
| ; CHECK-AVX2-NEXT: vmovups -16(%rdi), %xmm0 |
| ; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi) |
| ; CHECK-AVX2-NEXT: movslq %esi, %rax |
| ; CHECK-AVX2-NEXT: movq %rax, -8(%rdi) |
| ; CHECK-AVX2-NEXT: movl %eax, -16(%rdi) |
| ; CHECK-AVX2-NEXT: movl $0, -11(%rdi) |
| ; CHECK-AVX2-NEXT: movl -16(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movl %eax, 16(%rdi) |
| ; CHECK-AVX2-NEXT: movb -12(%rdi), %al |
| ; CHECK-AVX2-NEXT: movb %al, 20(%rdi) |
| ; CHECK-AVX2-NEXT: movl -11(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movl %eax, 21(%rdi) |
| ; CHECK-AVX2-NEXT: movl -7(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movl %eax, 25(%rdi) |
| ; CHECK-AVX2-NEXT: movzwl -3(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movw %ax, 29(%rdi) |
| ; CHECK-AVX2-NEXT: movb -1(%rdi), %al |
| ; CHECK-AVX2-NEXT: movb %al, 31(%rdi) |
| ; CHECK-AVX2-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: test_overlap_4: |
| ; CHECK-AVX512: # %bb.0: # %entry |
| ; CHECK-AVX512-NEXT: vmovups -16(%rdi), %xmm0 |
| ; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi) |
| ; CHECK-AVX512-NEXT: movslq %esi, %rax |
| ; CHECK-AVX512-NEXT: movq %rax, -8(%rdi) |
| ; CHECK-AVX512-NEXT: movl %eax, -16(%rdi) |
| ; CHECK-AVX512-NEXT: movl $0, -11(%rdi) |
| ; CHECK-AVX512-NEXT: movl -16(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movl %eax, 16(%rdi) |
| ; CHECK-AVX512-NEXT: movb -12(%rdi), %al |
| ; CHECK-AVX512-NEXT: movb %al, 20(%rdi) |
| ; CHECK-AVX512-NEXT: movl -11(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movl %eax, 21(%rdi) |
| ; CHECK-AVX512-NEXT: movl -7(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movl %eax, 25(%rdi) |
| ; CHECK-AVX512-NEXT: movzwl -3(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movw %ax, 29(%rdi) |
| ; CHECK-AVX512-NEXT: movb -1(%rdi), %al |
| ; CHECK-AVX512-NEXT: movb %al, 31(%rdi) |
| ; CHECK-AVX512-NEXT: retq |
| entry: |
| %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 |
| tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) |
| %conv = sext i32 %x to i64 |
| %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8 |
| %0 = bitcast i8* %add.ptr1 to i64* |
| store i64 %conv, i64* %0, align 8 |
| %1 = bitcast i8* %add.ptr to i32* |
| store i32 %x, i32* %1, align 4 |
| %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -11 |
| %2 = bitcast i8* %add.ptr3 to i32* |
| store i32 0, i32* %2, align 4 |
| %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 16 |
| tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr4, i8* nonnull align 4 %add.ptr, i64 16, i1 false) |
| ret void |
| } |
| |
| ; Function Attrs: nounwind uwtable |
| define dso_local void @test_overlap_5(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { |
| ; CHECK-LABEL: test_overlap_5: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: movups -16(%rdi), %xmm0 |
| ; CHECK-NEXT: movups %xmm0, (%rdi) |
| ; CHECK-NEXT: movslq %esi, %rax |
| ; CHECK-NEXT: movq %rax, -16(%rdi) |
| ; CHECK-NEXT: movb %al, -14(%rdi) |
| ; CHECK-NEXT: movb $0, -11(%rdi) |
| ; CHECK-NEXT: movzwl -16(%rdi), %eax |
| ; CHECK-NEXT: movw %ax, 16(%rdi) |
| ; CHECK-NEXT: movb -14(%rdi), %al |
| ; CHECK-NEXT: movb %al, 18(%rdi) |
| ; CHECK-NEXT: movzwl -13(%rdi), %eax |
| ; CHECK-NEXT: movw %ax, 19(%rdi) |
| ; CHECK-NEXT: movb -11(%rdi), %al |
| ; CHECK-NEXT: movb %al, 21(%rdi) |
| ; CHECK-NEXT: movq -10(%rdi), %rax |
| ; CHECK-NEXT: movq %rax, 22(%rdi) |
| ; CHECK-NEXT: movzwl -2(%rdi), %eax |
| ; CHECK-NEXT: movw %ax, 30(%rdi) |
| ; CHECK-NEXT: retq |
| ; |
| ; DISABLED-LABEL: test_overlap_5: |
| ; DISABLED: # %bb.0: # %entry |
| ; DISABLED-NEXT: movups -16(%rdi), %xmm0 |
| ; DISABLED-NEXT: movups %xmm0, (%rdi) |
| ; DISABLED-NEXT: movslq %esi, %rax |
| ; DISABLED-NEXT: movq %rax, -16(%rdi) |
| ; DISABLED-NEXT: movb %al, -14(%rdi) |
| ; DISABLED-NEXT: movb $0, -11(%rdi) |
| ; DISABLED-NEXT: movups -16(%rdi), %xmm0 |
| ; DISABLED-NEXT: movups %xmm0, 16(%rdi) |
| ; DISABLED-NEXT: retq |
| ; |
| ; CHECK-AVX2-LABEL: test_overlap_5: |
| ; CHECK-AVX2: # %bb.0: # %entry |
| ; CHECK-AVX2-NEXT: vmovups -16(%rdi), %xmm0 |
| ; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi) |
| ; CHECK-AVX2-NEXT: movslq %esi, %rax |
| ; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) |
| ; CHECK-AVX2-NEXT: movb %al, -14(%rdi) |
| ; CHECK-AVX2-NEXT: movb $0, -11(%rdi) |
| ; CHECK-AVX2-NEXT: movzwl -16(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movw %ax, 16(%rdi) |
| ; CHECK-AVX2-NEXT: movb -14(%rdi), %al |
| ; CHECK-AVX2-NEXT: movb %al, 18(%rdi) |
| ; CHECK-AVX2-NEXT: movzwl -13(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movw %ax, 19(%rdi) |
| ; CHECK-AVX2-NEXT: movb -11(%rdi), %al |
| ; CHECK-AVX2-NEXT: movb %al, 21(%rdi) |
| ; CHECK-AVX2-NEXT: movq -10(%rdi), %rax |
| ; CHECK-AVX2-NEXT: movq %rax, 22(%rdi) |
| ; CHECK-AVX2-NEXT: movzwl -2(%rdi), %eax |
| ; CHECK-AVX2-NEXT: movw %ax, 30(%rdi) |
| ; CHECK-AVX2-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: test_overlap_5: |
| ; CHECK-AVX512: # %bb.0: # %entry |
| ; CHECK-AVX512-NEXT: vmovups -16(%rdi), %xmm0 |
| ; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi) |
| ; CHECK-AVX512-NEXT: movslq %esi, %rax |
| ; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) |
| ; CHECK-AVX512-NEXT: movb %al, -14(%rdi) |
| ; CHECK-AVX512-NEXT: movb $0, -11(%rdi) |
| ; CHECK-AVX512-NEXT: movzwl -16(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movw %ax, 16(%rdi) |
| ; CHECK-AVX512-NEXT: movb -14(%rdi), %al |
| ; CHECK-AVX512-NEXT: movb %al, 18(%rdi) |
| ; CHECK-AVX512-NEXT: movzwl -13(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movw %ax, 19(%rdi) |
| ; CHECK-AVX512-NEXT: movb -11(%rdi), %al |
| ; CHECK-AVX512-NEXT: movb %al, 21(%rdi) |
| ; CHECK-AVX512-NEXT: movq -10(%rdi), %rax |
| ; CHECK-AVX512-NEXT: movq %rax, 22(%rdi) |
| ; CHECK-AVX512-NEXT: movzwl -2(%rdi), %eax |
| ; CHECK-AVX512-NEXT: movw %ax, 30(%rdi) |
| ; CHECK-AVX512-NEXT: retq |
| entry: |
| %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 |
| tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) |
| %conv = sext i32 %x to i64 |
| %0 = bitcast i8* %add.ptr to i64* |
| store i64 %conv, i64* %0, align 8 |
| %conv2 = trunc i32 %x to i8 |
| %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -14 |
| store i8 %conv2, i8* %add.ptr3, align 1 |
| %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -11 |
| store i8 0, i8* %add.ptr4, align 1 |
| %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16 |
| tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false) |
| ret void |
| } |
| |
| attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } |
| attributes #1 = { argmemonly nounwind } |
| |
| |