| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41 |
| ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX |
| ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F |
| ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VL |
| |
| define <2 x double> @floor_v2f64(<2 x double> %p) { |
| ; SSE41-LABEL: floor_v2f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_v2f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $9, %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_v2f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundpd $9, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) |
| ret <2 x double> %t |
| } |
| declare <2 x double> @llvm.floor.v2f64(<2 x double> %p) |
| |
| define <4 x float> @floor_v4f32(<4 x float> %p) { |
| ; SSE41-LABEL: floor_v4f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_v4f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $9, %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_v4f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundps $9, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) |
| ret <4 x float> %t |
| } |
| declare <4 x float> @llvm.floor.v4f32(<4 x float> %p) |
| |
| define <4 x double> @floor_v4f64(<4 x double> %p){ |
| ; SSE41-LABEL: floor_v4f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundpd $9, %xmm1, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_v4f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $9, %ymm0, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_v4f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundpd $9, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) |
| ret <4 x double> %t |
| } |
| declare <4 x double> @llvm.floor.v4f64(<4 x double> %p) |
| |
| define <8 x float> @floor_v8f32(<8 x float> %p) { |
| ; SSE41-LABEL: floor_v8f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundps $9, %xmm1, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_v8f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $9, %ymm0, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_v8f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundps $9, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) |
| ret <8 x float> %t |
| } |
| declare <8 x float> @llvm.floor.v8f32(<8 x float> %p) |
| |
| define <8 x double> @floor_v8f64(<8 x double> %p){ |
| ; SSE41-LABEL: floor_v8f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundpd $9, %xmm1, %xmm1 |
| ; SSE41-NEXT: roundpd $9, %xmm2, %xmm2 |
| ; SSE41-NEXT: roundpd $9, %xmm3, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_v8f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $9, %ymm0, %ymm0 |
| ; AVX-NEXT: vroundpd $9, %ymm1, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_v8f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vrndscalepd $9, %zmm0, %zmm0 |
| ; AVX512-NEXT: retq |
| %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) |
| ret <8 x double> %t |
| } |
| declare <8 x double> @llvm.floor.v8f64(<8 x double> %p) |
| |
| define <16 x float> @floor_v16f32(<16 x float> %p) { |
| ; SSE41-LABEL: floor_v16f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundps $9, %xmm1, %xmm1 |
| ; SSE41-NEXT: roundps $9, %xmm2, %xmm2 |
| ; SSE41-NEXT: roundps $9, %xmm3, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_v16f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $9, %ymm0, %ymm0 |
| ; AVX-NEXT: vroundps $9, %ymm1, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_v16f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vrndscaleps $9, %zmm0, %zmm0 |
| ; AVX512-NEXT: retq |
| %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) |
| ret <16 x float> %t |
| } |
| declare <16 x float> @llvm.floor.v16f32(<16 x float> %p) |
| |
| define <2 x double> @ceil_v2f64(<2 x double> %p) { |
| ; SSE41-LABEL: ceil_v2f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_v2f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $10, %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_v2f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundpd $10, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) |
| ret <2 x double> %t |
| } |
| declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p) |
| |
| define <2 x double> @ceil_v2f64_load(<2 x double>* %ptr) { |
| ; SSE41-LABEL: ceil_v2f64_load: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movupd (%rdi), %xmm0 |
| ; SSE41-NEXT: roundpd $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_v2f64_load: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $10, (%rdi), %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_v2f64_load: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundpd $10, (%rdi), %xmm0 |
| ; AVX512-NEXT: retq |
| %p = load <2 x double>, <2 x double>* %ptr, align 1 |
| %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) |
| ret <2 x double> %t |
| } |
| |
| define <4 x float> @ceil_v4f32(<4 x float> %p) { |
| ; SSE41-LABEL: ceil_v4f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_v4f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $10, %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_v4f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundps $10, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) |
| ret <4 x float> %t |
| } |
| declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p) |
| |
| define <4 x float> @ceil_v4f32_load(<4 x float>* %ptr) { |
| ; SSE41-LABEL: ceil_v4f32_load: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movups (%rdi), %xmm0 |
| ; SSE41-NEXT: roundps $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_v4f32_load: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $10, (%rdi), %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_v4f32_load: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundps $10, (%rdi), %xmm0 |
| ; AVX512-NEXT: retq |
| %p = load <4 x float>, <4 x float>* %ptr, align 1 |
| %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) |
| ret <4 x float> %t |
| } |
| |
| define <4 x double> @ceil_v4f64(<4 x double> %p) { |
| ; SSE41-LABEL: ceil_v4f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundpd $10, %xmm1, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_v4f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $10, %ymm0, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_v4f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundpd $10, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) |
| ret <4 x double> %t |
| } |
| declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p) |
| |
| define <8 x float> @ceil_v8f32(<8 x float> %p) { |
| ; SSE41-LABEL: ceil_v8f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundps $10, %xmm1, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_v8f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $10, %ymm0, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_v8f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundps $10, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) |
| ret <8 x float> %t |
| } |
| declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p) |
| |
| define <8 x double> @ceil_v8f64(<8 x double> %p){ |
| ; SSE41-LABEL: ceil_v8f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundpd $10, %xmm1, %xmm1 |
| ; SSE41-NEXT: roundpd $10, %xmm2, %xmm2 |
| ; SSE41-NEXT: roundpd $10, %xmm3, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_v8f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $10, %ymm0, %ymm0 |
| ; AVX-NEXT: vroundpd $10, %ymm1, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_v8f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vrndscalepd $10, %zmm0, %zmm0 |
| ; AVX512-NEXT: retq |
| %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) |
| ret <8 x double> %t |
| } |
| declare <8 x double> @llvm.ceil.v8f64(<8 x double> %p) |
| |
| define <16 x float> @ceil_v16f32(<16 x float> %p) { |
| ; SSE41-LABEL: ceil_v16f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundps $10, %xmm1, %xmm1 |
| ; SSE41-NEXT: roundps $10, %xmm2, %xmm2 |
| ; SSE41-NEXT: roundps $10, %xmm3, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_v16f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $10, %ymm0, %ymm0 |
| ; AVX-NEXT: vroundps $10, %ymm1, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_v16f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vrndscaleps $10, %zmm0, %zmm0 |
| ; AVX512-NEXT: retq |
| %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) |
| ret <16 x float> %t |
| } |
| declare <16 x float> @llvm.ceil.v16f32(<16 x float> %p) |
| |
| define <2 x double> @trunc_v2f64(<2 x double> %p) { |
| ; SSE41-LABEL: trunc_v2f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: trunc_v2f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $11, %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: trunc_v2f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundpd $11, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) |
| ret <2 x double> %t |
| } |
| declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p) |
| |
| define <4 x float> @trunc_v4f32(<4 x float> %p) { |
| ; SSE41-LABEL: trunc_v4f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $11, %xmm0, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: trunc_v4f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $11, %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: trunc_v4f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundps $11, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) |
| ret <4 x float> %t |
| } |
| declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p) |
| |
| define <4 x double> @trunc_v4f64(<4 x double> %p) { |
| ; SSE41-LABEL: trunc_v4f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundpd $11, %xmm1, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: trunc_v4f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $11, %ymm0, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: trunc_v4f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundpd $11, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) |
| ret <4 x double> %t |
| } |
| declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p) |
| |
| define <8 x float> @trunc_v8f32(<8 x float> %p) { |
| ; SSE41-LABEL: trunc_v8f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $11, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundps $11, %xmm1, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: trunc_v8f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $11, %ymm0, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: trunc_v8f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundps $11, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) |
| ret <8 x float> %t |
| } |
| declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p) |
| |
| define <8 x double> @trunc_v8f64(<8 x double> %p){ |
| ; SSE41-LABEL: trunc_v8f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundpd $11, %xmm1, %xmm1 |
| ; SSE41-NEXT: roundpd $11, %xmm2, %xmm2 |
| ; SSE41-NEXT: roundpd $11, %xmm3, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: trunc_v8f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $11, %ymm0, %ymm0 |
| ; AVX-NEXT: vroundpd $11, %ymm1, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: trunc_v8f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vrndscalepd $11, %zmm0, %zmm0 |
| ; AVX512-NEXT: retq |
| %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) |
| ret <8 x double> %t |
| } |
| declare <8 x double> @llvm.trunc.v8f64(<8 x double> %p) |
| |
| define <16 x float> @trunc_v16f32(<16 x float> %p) { |
| ; SSE41-LABEL: trunc_v16f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $11, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundps $11, %xmm1, %xmm1 |
| ; SSE41-NEXT: roundps $11, %xmm2, %xmm2 |
| ; SSE41-NEXT: roundps $11, %xmm3, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: trunc_v16f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $11, %ymm0, %ymm0 |
| ; AVX-NEXT: vroundps $11, %ymm1, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: trunc_v16f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vrndscaleps $11, %zmm0, %zmm0 |
| ; AVX512-NEXT: retq |
| %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) |
| ret <16 x float> %t |
| } |
| declare <16 x float> @llvm.trunc.v16f32(<16 x float> %p) |
| |
| define <2 x double> @rint_v2f64(<2 x double> %p) { |
| ; SSE41-LABEL: rint_v2f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $4, %xmm0, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: rint_v2f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $4, %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: rint_v2f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundpd $4, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) |
| ret <2 x double> %t |
| } |
| declare <2 x double> @llvm.rint.v2f64(<2 x double> %p) |
| |
| define <4 x float> @rint_v4f32(<4 x float> %p) { |
| ; SSE41-LABEL: rint_v4f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $4, %xmm0, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: rint_v4f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $4, %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: rint_v4f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundps $4, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) |
| ret <4 x float> %t |
| } |
| declare <4 x float> @llvm.rint.v4f32(<4 x float> %p) |
| |
| define <4 x double> @rint_v4f64(<4 x double> %p) { |
| ; SSE41-LABEL: rint_v4f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $4, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundpd $4, %xmm1, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: rint_v4f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $4, %ymm0, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: rint_v4f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundpd $4, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) |
| ret <4 x double> %t |
| } |
| declare <4 x double> @llvm.rint.v4f64(<4 x double> %p) |
| |
| define <8 x float> @rint_v8f32(<8 x float> %p) { |
| ; SSE41-LABEL: rint_v8f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $4, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundps $4, %xmm1, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: rint_v8f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $4, %ymm0, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: rint_v8f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundps $4, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) |
| ret <8 x float> %t |
| } |
| declare <8 x float> @llvm.rint.v8f32(<8 x float> %p) |
| |
| define <8 x double> @rint_v8f64(<8 x double> %p){ |
| ; SSE41-LABEL: rint_v8f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $4, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundpd $4, %xmm1, %xmm1 |
| ; SSE41-NEXT: roundpd $4, %xmm2, %xmm2 |
| ; SSE41-NEXT: roundpd $4, %xmm3, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: rint_v8f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $4, %ymm0, %ymm0 |
| ; AVX-NEXT: vroundpd $4, %ymm1, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: rint_v8f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vrndscalepd $4, %zmm0, %zmm0 |
| ; AVX512-NEXT: retq |
| %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) |
| ret <8 x double> %t |
| } |
| declare <8 x double> @llvm.rint.v8f64(<8 x double> %p) |
| |
| define <16 x float> @rint_v16f32(<16 x float> %p) { |
| ; SSE41-LABEL: rint_v16f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $4, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundps $4, %xmm1, %xmm1 |
| ; SSE41-NEXT: roundps $4, %xmm2, %xmm2 |
| ; SSE41-NEXT: roundps $4, %xmm3, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: rint_v16f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $4, %ymm0, %ymm0 |
| ; AVX-NEXT: vroundps $4, %ymm1, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: rint_v16f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vrndscaleps $4, %zmm0, %zmm0 |
| ; AVX512-NEXT: retq |
| %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) |
| ret <16 x float> %t |
| } |
| declare <16 x float> @llvm.rint.v16f32(<16 x float> %p) |
| |
| define <2 x double> @nearbyint_v2f64(<2 x double> %p) { |
| ; SSE41-LABEL: nearbyint_v2f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $12, %xmm0, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: nearbyint_v2f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $12, %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: nearbyint_v2f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundpd $12, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) |
| ret <2 x double> %t |
| } |
| declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) |
| |
| define <4 x float> @nearbyint_v4f32(<4 x float> %p) { |
| ; SSE41-LABEL: nearbyint_v4f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $12, %xmm0, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: nearbyint_v4f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $12, %xmm0, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: nearbyint_v4f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundps $12, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) |
| ret <4 x float> %t |
| } |
| declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) |
| |
| define <4 x double> @nearbyint_v4f64(<4 x double> %p) { |
| ; SSE41-LABEL: nearbyint_v4f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $12, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundpd $12, %xmm1, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: nearbyint_v4f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $12, %ymm0, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: nearbyint_v4f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundpd $12, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) |
| ret <4 x double> %t |
| } |
| declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) |
| |
| define <8 x float> @nearbyint_v8f32(<8 x float> %p) { |
| ; SSE41-LABEL: nearbyint_v8f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $12, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundps $12, %xmm1, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: nearbyint_v8f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $12, %ymm0, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: nearbyint_v8f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundps $12, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) |
| ret <8 x float> %t |
| } |
| declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) |
| |
| define <8 x double> @nearbyint_v8f64(<8 x double> %p){ |
| ; SSE41-LABEL: nearbyint_v8f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $12, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundpd $12, %xmm1, %xmm1 |
| ; SSE41-NEXT: roundpd $12, %xmm2, %xmm2 |
| ; SSE41-NEXT: roundpd $12, %xmm3, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: nearbyint_v8f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundpd $12, %ymm0, %ymm0 |
| ; AVX-NEXT: vroundpd $12, %ymm1, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: nearbyint_v8f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vrndscalepd $12, %zmm0, %zmm0 |
| ; AVX512-NEXT: retq |
| %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) |
| ret <8 x double> %t |
| } |
| declare <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) |
| |
| define <16 x float> @nearbyint_v16f32(<16 x float> %p) { |
| ; SSE41-LABEL: nearbyint_v16f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $12, %xmm0, %xmm0 |
| ; SSE41-NEXT: roundps $12, %xmm1, %xmm1 |
| ; SSE41-NEXT: roundps $12, %xmm2, %xmm2 |
| ; SSE41-NEXT: roundps $12, %xmm3, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: nearbyint_v16f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundps $12, %ymm0, %ymm0 |
| ; AVX-NEXT: vroundps $12, %ymm1, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: nearbyint_v16f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vrndscaleps $12, %zmm0, %zmm0 |
| ; AVX512-NEXT: retq |
| %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) |
| ret <16 x float> %t |
| } |
| declare <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) |
| |
| ; |
| ; Constant Folding |
| ; |
| |
| define <2 x double> @const_floor_v2f64() { |
| ; SSE41-LABEL: const_floor_v2f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00] |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: const_floor_v2f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: const_floor_v2f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00] |
| ; AVX512-NEXT: retq |
| %t = call <2 x double> @llvm.floor.v2f64(<2 x double> <double -1.5, double 2.5>) |
| ret <2 x double> %t |
| } |
| |
| define <4 x float> @const_floor_v4f32() { |
| ; SSE41-LABEL: const_floor_v4f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: const_floor_v4f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: const_floor_v4f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] |
| ; AVX512-NEXT: retq |
| %t = call <4 x float> @llvm.floor.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>) |
| ret <4 x float> %t |
| } |
| |
| define <2 x double> @const_ceil_v2f64() { |
| ; SSE41-LABEL: const_ceil_v2f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00] |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: const_ceil_v2f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: const_ceil_v2f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00] |
| ; AVX512-NEXT: retq |
| %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> <double -1.5, double 2.5>) |
| ret <2 x double> %t |
| } |
| |
| define <4 x float> @const_ceil_v4f32() { |
| ; SSE41-LABEL: const_ceil_v4f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00] |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: const_ceil_v4f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: const_ceil_v4f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00] |
| ; AVX512-NEXT: retq |
| %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>) |
| ret <4 x float> %t |
| } |
| |
| define <2 x double> @const_trunc_v2f64() { |
| ; SSE41-LABEL: const_trunc_v2f64: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00] |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: const_trunc_v2f64: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: const_trunc_v2f64: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00] |
| ; AVX512-NEXT: retq |
| %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> <double -1.5, double 2.5>) |
| ret <2 x double> %t |
| } |
| |
| define <4 x float> @const_trunc_v4f32() { |
| ; SSE41-LABEL: const_trunc_v4f32: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: const_trunc_v4f32: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: const_trunc_v4f32: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] |
| ; AVX512-NEXT: retq |
| %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>) |
| ret <4 x float> %t |
| } |
| |
| ; |
| ; Scalar and masked instructions |
| ; |
| |
| define <4 x float> @floor_ss(<4 x float> %x, <4 x float> %y) nounwind { |
| ; SSE41-LABEL: floor_ss: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundss $1, %xmm0, %xmm1 |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_ss: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundss $1, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_ss: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundss $1, %xmm0, %xmm1, %xmm0 |
| ; AVX512-NEXT: retq |
| %s = extractelement <4 x float> %x, i32 0 |
| %call = call float @llvm.floor.f32(float %s) |
| %res = insertelement <4 x float> %y, float %call, i32 0 |
| ret <4 x float> %res |
| } |
| declare float @llvm.floor.f32(float %s) |
| |
| define <2 x double> @floor_sd(<2 x double> %x, <2 x double> %y) nounwind { |
| ; SSE41-LABEL: floor_sd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundsd $1, %xmm0, %xmm1 |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_sd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundsd $1, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_sd: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundsd $1, %xmm0, %xmm1, %xmm0 |
| ; AVX512-NEXT: retq |
| %s = extractelement <2 x double> %x, i32 0 |
| %call = call double @llvm.floor.f64(double %s) |
| %res = insertelement <2 x double> %y, double %call, i32 0 |
| ret <2 x double> %res |
| } |
| declare double @llvm.floor.f64(double %s) |
| |
| define <4 x float> @floor_mask_128_ps(<4 x float> %x, <4 x float> %y) nounwind { |
| ; SSE41-LABEL: floor_mask_128_ps: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $9, %xmm0, %xmm2 |
| ; SSE41-NEXT: cmpeqps %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_mask_128_ps: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm2 |
| ; AVX-NEXT: vroundps $9, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: floor_mask_128_ps: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundps $9, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: floor_mask_128_ps: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscaleps $9, %xmm0, %xmm1 {%k1} |
| ; AVX512VL-NEXT: vmovaps %xmm1, %xmm0 |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <4 x float> %x, %y |
| %call = call <4 x float> @llvm.floor.v4f32(<4 x float> %x) |
| %res = select <4 x i1> %k, <4 x float> %call, <4 x float> %y |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @floor_maskz_128_ps(<4 x float> %x, <4 x float> %y) nounwind { |
| ; SSE41-LABEL: floor_maskz_128_ps: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: cmpeqps %xmm0, %xmm1 |
| ; SSE41-NEXT: roundps $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: andps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_maskz_128_ps: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 |
| ; AVX-NEXT: vroundps $9, %xmm0, %xmm0 |
| ; AVX-NEXT: vandps %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: floor_maskz_128_ps: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundps $9, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: floor_maskz_128_ps: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscaleps $9, %xmm0, %xmm0 {%k1} {z} |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <4 x float> %x, %y |
| %call = call <4 x float> @llvm.floor.v4f32(<4 x float> %x) |
| %res = select <4 x i1> %k, <4 x float> %call, <4 x float> zeroinitializer |
| ret <4 x float> %res |
| } |
| |
| define <2 x double> @floor_mask_128_pd(<2 x double> %x, <2 x double> %y) nounwind { |
| ; SSE41-LABEL: floor_mask_128_pd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $9, %xmm0, %xmm2 |
| ; SSE41-NEXT: cmpeqpd %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_mask_128_pd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm2 |
| ; AVX-NEXT: vroundpd $9, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: floor_mask_128_pd: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundpd $9, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: floor_mask_128_pd: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscalepd $9, %xmm0, %xmm1 {%k1} |
| ; AVX512VL-NEXT: vmovapd %xmm1, %xmm0 |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <2 x double> %x, %y |
| %call = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) |
| %res = select <2 x i1> %k, <2 x double> %call, <2 x double> %y |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @floor_maskz_128_pd(<2 x double> %x, <2 x double> %y) nounwind { |
| ; SSE41-LABEL: floor_maskz_128_pd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: cmpeqpd %xmm0, %xmm1 |
| ; SSE41-NEXT: roundpd $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: andpd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_maskz_128_pd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 |
| ; AVX-NEXT: vroundpd $9, %xmm0, %xmm0 |
| ; AVX-NEXT: vandpd %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: floor_maskz_128_pd: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundpd $9, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: floor_maskz_128_pd: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscalepd $9, %xmm0, %xmm0 {%k1} {z} |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <2 x double> %x, %y |
| %call = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) |
| %res = select <2 x i1> %k, <2 x double> %call, <2 x double> zeroinitializer |
| ret <2 x double> %res |
| } |
| |
| define <8 x float> @floor_mask_256_ps(<8 x float> %x, <8 x float> %y) nounwind { |
| ; SSE41-LABEL: floor_mask_256_ps: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $9, %xmm1, %xmm4 |
| ; SSE41-NEXT: cmpeqps %xmm3, %xmm1 |
| ; SSE41-NEXT: roundps $9, %xmm0, %xmm5 |
| ; SSE41-NEXT: cmpeqps %xmm2, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm5, %xmm2 |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm4, %xmm3 |
| ; SSE41-NEXT: movaps %xmm2, %xmm0 |
| ; SSE41-NEXT: movaps %xmm3, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_mask_256_ps: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm2 |
| ; AVX-NEXT: vroundps $9, %ymm0, %ymm0 |
| ; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: floor_mask_256_ps: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundps $9, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: floor_mask_256_ps: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqps %ymm1, %ymm0, %k1 |
| ; AVX512VL-NEXT: vrndscaleps $9, %ymm0, %ymm1 {%k1} |
| ; AVX512VL-NEXT: vmovaps %ymm1, %ymm0 |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <8 x float> %x, %y |
| %call = call <8 x float> @llvm.floor.v8f32(<8 x float> %x) |
| %res = select <8 x i1> %k, <8 x float> %call, <8 x float> %y |
| ret <8 x float> %res |
| } |
| |
| define <8 x float> @floor_maskz_256_ps(<8 x float> %x, <8 x float> %y) nounwind { |
| ; SSE41-LABEL: floor_maskz_256_ps: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: cmpeqps %xmm1, %xmm3 |
| ; SSE41-NEXT: cmpeqps %xmm0, %xmm2 |
| ; SSE41-NEXT: roundps $9, %xmm1, %xmm1 |
| ; SSE41-NEXT: andps %xmm3, %xmm1 |
| ; SSE41-NEXT: roundps $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: andps %xmm2, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_maskz_256_ps: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 |
| ; AVX-NEXT: vroundps $9, %ymm0, %ymm0 |
| ; AVX-NEXT: vandps %ymm0, %ymm1, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: floor_maskz_256_ps: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundps $9, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: floor_maskz_256_ps: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqps %ymm1, %ymm0, %k1 |
| ; AVX512VL-NEXT: vrndscaleps $9, %ymm0, %ymm0 {%k1} {z} |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <8 x float> %x, %y |
| %call = call <8 x float> @llvm.floor.v8f32(<8 x float> %x) |
| %res = select <8 x i1> %k, <8 x float> %call, <8 x float> zeroinitializer |
| ret <8 x float> %res |
| } |
| |
| define <4 x double> @floor_mask_256_pd(<4 x double> %x, <4 x double> %y) nounwind { |
| ; SSE41-LABEL: floor_mask_256_pd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $9, %xmm1, %xmm4 |
| ; SSE41-NEXT: cmpeqpd %xmm3, %xmm1 |
| ; SSE41-NEXT: roundpd $9, %xmm0, %xmm5 |
| ; SSE41-NEXT: cmpeqpd %xmm2, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm2 |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 |
| ; SSE41-NEXT: movapd %xmm2, %xmm0 |
| ; SSE41-NEXT: movapd %xmm3, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_mask_256_pd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm2 |
| ; AVX-NEXT: vroundpd $9, %ymm0, %ymm0 |
| ; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: floor_mask_256_pd: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundpd $9, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: floor_mask_256_pd: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 |
| ; AVX512VL-NEXT: vrndscalepd $9, %ymm0, %ymm1 {%k1} |
| ; AVX512VL-NEXT: vmovapd %ymm1, %ymm0 |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <4 x double> %x, %y |
| %call = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) |
| %res = select <4 x i1> %k, <4 x double> %call, <4 x double> %y |
| ret <4 x double> %res |
| } |
| |
| define <4 x double> @floor_maskz_256_pd(<4 x double> %x, <4 x double> %y) nounwind { |
| ; SSE41-LABEL: floor_maskz_256_pd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: cmpeqpd %xmm1, %xmm3 |
| ; SSE41-NEXT: cmpeqpd %xmm0, %xmm2 |
| ; SSE41-NEXT: roundpd $9, %xmm1, %xmm1 |
| ; SSE41-NEXT: andpd %xmm3, %xmm1 |
| ; SSE41-NEXT: roundpd $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: andpd %xmm2, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_maskz_256_pd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 |
| ; AVX-NEXT: vroundpd $9, %ymm0, %ymm0 |
| ; AVX-NEXT: vandpd %ymm0, %ymm1, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: floor_maskz_256_pd: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundpd $9, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: floor_maskz_256_pd: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 |
| ; AVX512VL-NEXT: vrndscalepd $9, %ymm0, %ymm0 {%k1} {z} |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <4 x double> %x, %y |
| %call = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) |
| %res = select <4 x i1> %k, <4 x double> %call, <4 x double> zeroinitializer |
| ret <4 x double> %res |
| } |
| |
| define <16 x float> @floor_mask_512_ps(<16 x float> %x, <16 x float> %y) nounwind { |
| ; SSE41-LABEL: floor_mask_512_ps: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $9, %xmm3, %xmm8 |
| ; SSE41-NEXT: cmpeqps %xmm7, %xmm3 |
| ; SSE41-NEXT: roundps $9, %xmm2, %xmm9 |
| ; SSE41-NEXT: cmpeqps %xmm6, %xmm2 |
| ; SSE41-NEXT: roundps $9, %xmm1, %xmm10 |
| ; SSE41-NEXT: cmpeqps %xmm5, %xmm1 |
| ; SSE41-NEXT: roundps $9, %xmm0, %xmm11 |
| ; SSE41-NEXT: cmpeqps %xmm4, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm11, %xmm4 |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm10, %xmm5 |
| ; SSE41-NEXT: movaps %xmm2, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm9, %xmm6 |
| ; SSE41-NEXT: movaps %xmm3, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm8, %xmm7 |
| ; SSE41-NEXT: movaps %xmm4, %xmm0 |
| ; SSE41-NEXT: movaps %xmm5, %xmm1 |
| ; SSE41-NEXT: movaps %xmm6, %xmm2 |
| ; SSE41-NEXT: movaps %xmm7, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_mask_512_ps: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %ymm3, %ymm1, %ymm4 |
| ; AVX-NEXT: vcmpeqps %ymm2, %ymm0, %ymm5 |
| ; AVX-NEXT: vroundps $9, %ymm1, %ymm1 |
| ; AVX-NEXT: vroundps $9, %ymm0, %ymm0 |
| ; AVX-NEXT: vblendvps %ymm5, %ymm0, %ymm2, %ymm0 |
| ; AVX-NEXT: vblendvps %ymm4, %ymm1, %ymm3, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_mask_512_ps: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512-NEXT: vrndscaleps $9, %zmm0, %zmm1 {%k1} |
| ; AVX512-NEXT: vmovaps %zmm1, %zmm0 |
| ; AVX512-NEXT: retq |
| %k = fcmp oeq <16 x float> %x, %y |
| %call = call <16 x float> @llvm.floor.v16f32(<16 x float> %x) |
| %res = select <16 x i1> %k, <16 x float> %call, <16 x float> %y |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @floor_maskz_512_ps(<16 x float> %x, <16 x float> %y) nounwind { |
| ; SSE41-LABEL: floor_maskz_512_ps: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: cmpeqps %xmm3, %xmm7 |
| ; SSE41-NEXT: cmpeqps %xmm2, %xmm6 |
| ; SSE41-NEXT: cmpeqps %xmm1, %xmm5 |
| ; SSE41-NEXT: cmpeqps %xmm0, %xmm4 |
| ; SSE41-NEXT: roundps $9, %xmm3, %xmm3 |
| ; SSE41-NEXT: andps %xmm7, %xmm3 |
| ; SSE41-NEXT: roundps $9, %xmm2, %xmm2 |
| ; SSE41-NEXT: andps %xmm6, %xmm2 |
| ; SSE41-NEXT: roundps $9, %xmm1, %xmm1 |
| ; SSE41-NEXT: andps %xmm5, %xmm1 |
| ; SSE41-NEXT: roundps $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: andps %xmm4, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_maskz_512_ps: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %ymm3, %ymm1, %ymm3 |
| ; AVX-NEXT: vcmpeqps %ymm2, %ymm0, %ymm2 |
| ; AVX-NEXT: vroundps $9, %ymm1, %ymm1 |
| ; AVX-NEXT: vandps %ymm1, %ymm3, %ymm1 |
| ; AVX-NEXT: vroundps $9, %ymm0, %ymm0 |
| ; AVX-NEXT: vandps %ymm0, %ymm2, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_maskz_512_ps: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512-NEXT: vrndscaleps $9, %zmm0, %zmm0 {%k1} {z} |
| ; AVX512-NEXT: retq |
| %k = fcmp oeq <16 x float> %x, %y |
| %call = call <16 x float> @llvm.floor.v16f32(<16 x float> %x) |
| %res = select <16 x i1> %k, <16 x float> %call, <16 x float> zeroinitializer |
| ret <16 x float> %res |
| } |
| |
| define <8 x double> @floor_mask_512_pd(<8 x double> %x, <8 x double> %y) nounwind { |
| ; SSE41-LABEL: floor_mask_512_pd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $9, %xmm3, %xmm8 |
| ; SSE41-NEXT: cmpeqpd %xmm7, %xmm3 |
| ; SSE41-NEXT: roundpd $9, %xmm2, %xmm9 |
| ; SSE41-NEXT: cmpeqpd %xmm6, %xmm2 |
| ; SSE41-NEXT: roundpd $9, %xmm1, %xmm10 |
| ; SSE41-NEXT: cmpeqpd %xmm5, %xmm1 |
| ; SSE41-NEXT: roundpd $9, %xmm0, %xmm11 |
| ; SSE41-NEXT: cmpeqpd %xmm4, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm4 |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm5 |
| ; SSE41-NEXT: movapd %xmm2, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm6 |
| ; SSE41-NEXT: movapd %xmm3, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm7 |
| ; SSE41-NEXT: movapd %xmm4, %xmm0 |
| ; SSE41-NEXT: movapd %xmm5, %xmm1 |
| ; SSE41-NEXT: movapd %xmm6, %xmm2 |
| ; SSE41-NEXT: movapd %xmm7, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_mask_512_pd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %ymm3, %ymm1, %ymm4 |
| ; AVX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm5 |
| ; AVX-NEXT: vroundpd $9, %ymm1, %ymm1 |
| ; AVX-NEXT: vroundpd $9, %ymm0, %ymm0 |
| ; AVX-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0 |
| ; AVX-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_mask_512_pd: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512-NEXT: vrndscalepd $9, %zmm0, %zmm1 {%k1} |
| ; AVX512-NEXT: vmovapd %zmm1, %zmm0 |
| ; AVX512-NEXT: retq |
| %k = fcmp oeq <8 x double> %x, %y |
| %call = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) |
| %res = select <8 x i1> %k, <8 x double> %call, <8 x double> %y |
| ret <8 x double> %res |
| } |
| |
| define <8 x double> @floor_maskz_512_pd(<8 x double> %x, <8 x double> %y) nounwind { |
| ; SSE41-LABEL: floor_maskz_512_pd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: cmpeqpd %xmm3, %xmm7 |
| ; SSE41-NEXT: cmpeqpd %xmm2, %xmm6 |
| ; SSE41-NEXT: cmpeqpd %xmm1, %xmm5 |
| ; SSE41-NEXT: cmpeqpd %xmm0, %xmm4 |
| ; SSE41-NEXT: roundpd $9, %xmm3, %xmm3 |
| ; SSE41-NEXT: andpd %xmm7, %xmm3 |
| ; SSE41-NEXT: roundpd $9, %xmm2, %xmm2 |
| ; SSE41-NEXT: andpd %xmm6, %xmm2 |
| ; SSE41-NEXT: roundpd $9, %xmm1, %xmm1 |
| ; SSE41-NEXT: andpd %xmm5, %xmm1 |
| ; SSE41-NEXT: roundpd $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: andpd %xmm4, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_maskz_512_pd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %ymm3, %ymm1, %ymm3 |
| ; AVX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm2 |
| ; AVX-NEXT: vroundpd $9, %ymm1, %ymm1 |
| ; AVX-NEXT: vandpd %ymm1, %ymm3, %ymm1 |
| ; AVX-NEXT: vroundpd $9, %ymm0, %ymm0 |
| ; AVX-NEXT: vandpd %ymm0, %ymm2, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_maskz_512_pd: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512-NEXT: vrndscalepd $9, %zmm0, %zmm0 {%k1} {z} |
| ; AVX512-NEXT: retq |
| %k = fcmp oeq <8 x double> %x, %y |
| %call = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) |
| %res = select <8 x i1> %k, <8 x double> %call, <8 x double> zeroinitializer |
| ret <8 x double> %res |
| } |
| |
| define <4 x float> @floor_mask_ss(<4 x float> %x, <4 x float> %y, <4 x float> %w, i8 %k) nounwind { |
| ; SSE41-LABEL: floor_mask_ss: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: je LBB52_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundss $9, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB52_2: |
| ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_mask_ss: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: je LBB52_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB52_2: |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_mask_ss: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512-NEXT: vmovaps %xmm2, %xmm0 |
| ; AVX512-NEXT: retq |
| %mask = and i8 %k, 1 |
| %nmask = icmp eq i8 %mask, 0 |
| %s = extractelement <4 x float> %x, i64 0 |
| %call = tail call float @llvm.floor.f32(float %s) |
| %dst = extractelement <4 x float> %w, i64 0 |
| %low = select i1 %nmask, float %dst, float %call |
| %res = insertelement <4 x float> %y, float %low, i64 0 |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @floor_maskz_ss(<4 x float> %x, <4 x float> %y, i8 %k) nounwind { |
| ; SSE41-LABEL: floor_maskz_ss: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: je LBB53_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundss $9, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB53_2: |
| ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_maskz_ss: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 |
| ; AVX-NEXT: je LBB53_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB53_2: |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_maskz_ss: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: retq |
| %mask = and i8 %k, 1 |
| %nmask = icmp eq i8 %mask, 0 |
| %s = extractelement <4 x float> %x, i64 0 |
| %call = tail call float @llvm.floor.f32(float %s) |
| %low = select i1 %nmask, float zeroinitializer, float %call |
| %res = insertelement <4 x float> %y, float %low, i64 0 |
| ret <4 x float> %res |
| } |
| |
| define <2 x double> @floor_mask_sd(<2 x double> %x, <2 x double> %y, <2 x double> %w, i8 %k) nounwind { |
| ; SSE41-LABEL: floor_mask_sd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: je LBB54_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundsd $9, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB54_2: |
| ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_mask_sd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: je LBB54_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB54_2: |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_mask_sd: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512-NEXT: vmovapd %xmm2, %xmm0 |
| ; AVX512-NEXT: retq |
| %mask = and i8 %k, 1 |
| %nmask = icmp eq i8 %mask, 0 |
| %s = extractelement <2 x double> %x, i64 0 |
| %call = tail call double @llvm.floor.f64(double %s) |
| %dst = extractelement <2 x double> %w, i64 0 |
| %low = select i1 %nmask, double %dst, double %call |
| %res = insertelement <2 x double> %y, double %low, i64 0 |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @floor_maskz_sd(<2 x double> %x, <2 x double> %y, i8 %k) nounwind { |
| ; SSE41-LABEL: floor_maskz_sd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: xorpd %xmm2, %xmm2 |
| ; SSE41-NEXT: je LBB55_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundsd $9, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB55_2: |
| ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_maskz_sd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 |
| ; AVX-NEXT: je LBB55_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB55_2: |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_maskz_sd: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: retq |
| %mask = and i8 %k, 1 |
| %nmask = icmp eq i8 %mask, 0 |
| %s = extractelement <2 x double> %x, i64 0 |
| %call = tail call double @llvm.floor.f64(double %s) |
| %low = select i1 %nmask, double zeroinitializer, double %call |
| %res = insertelement <2 x double> %y, double %low, i64 0 |
| ret <2 x double> %res |
| } |
| |
| define <4 x float> @floor_mask_ss_trunc(<4 x float> %x, <4 x float> %y, <4 x float> %w, i16 %k) nounwind { |
| ; SSE41-LABEL: floor_mask_ss_trunc: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: je LBB56_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundss $9, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB56_2: |
| ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_mask_ss_trunc: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: je LBB56_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB56_2: |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_mask_ss_trunc: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512-NEXT: vmovaps %xmm2, %xmm0 |
| ; AVX512-NEXT: retq |
| %mask = trunc i16 %k to i1 |
| %s = extractelement <4 x float> %x, i64 0 |
| %call = tail call float @llvm.floor.f32(float %s) |
| %dst = extractelement <4 x float> %w, i64 0 |
| %low = select i1 %mask, float %call, float %dst |
| %res = insertelement <4 x float> %y, float %low, i64 0 |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @floor_maskz_ss_trunc(<4 x float> %x, <4 x float> %y, i16 %k) nounwind { |
| ; SSE41-LABEL: floor_maskz_ss_trunc: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: jne LBB57_1 |
| ; SSE41-NEXT: ## %bb.2: |
| ; SSE41-NEXT: xorps %xmm0, %xmm0 |
| ; SSE41-NEXT: jmp LBB57_3 |
| ; SSE41-NEXT: LBB57_1: |
| ; SSE41-NEXT: roundss $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: LBB57_3: |
| ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_maskz_ss_trunc: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: jne LBB57_1 |
| ; AVX-NEXT: ## %bb.2: |
| ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; AVX-NEXT: LBB57_1: |
| ; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_maskz_ss_trunc: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: retq |
| %mask = trunc i16 %k to i1 |
| %s = extractelement <4 x float> %x, i64 0 |
| %call = tail call float @llvm.floor.f32(float %s) |
| %low = select i1 %mask, float %call, float zeroinitializer |
| %res = insertelement <4 x float> %y, float %low, i64 0 |
| ret <4 x float> %res |
| } |
| |
| define <2 x double> @floor_mask_sd_trunc(<2 x double> %x, <2 x double> %y, <2 x double> %w, i16 %k) nounwind { |
| ; SSE41-LABEL: floor_mask_sd_trunc: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: je LBB58_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundsd $9, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB58_2: |
| ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_mask_sd_trunc: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: je LBB58_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB58_2: |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_mask_sd_trunc: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512-NEXT: vmovapd %xmm2, %xmm0 |
| ; AVX512-NEXT: retq |
| %mask = trunc i16 %k to i1 |
| %s = extractelement <2 x double> %x, i64 0 |
| %call = tail call double @llvm.floor.f64(double %s) |
| %dst = extractelement <2 x double> %w, i64 0 |
| %low = select i1 %mask, double %call, double %dst |
| %res = insertelement <2 x double> %y, double %low, i64 0 |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @floor_maskz_sd_trunc(<2 x double> %x, <2 x double> %y, i16 %k) nounwind { |
| ; SSE41-LABEL: floor_maskz_sd_trunc: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: jne LBB59_1 |
| ; SSE41-NEXT: ## %bb.2: |
| ; SSE41-NEXT: xorpd %xmm0, %xmm0 |
| ; SSE41-NEXT: jmp LBB59_3 |
| ; SSE41-NEXT: LBB59_1: |
| ; SSE41-NEXT: roundsd $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: LBB59_3: |
| ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_maskz_sd_trunc: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: jne LBB59_1 |
| ; AVX-NEXT: ## %bb.2: |
| ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] |
| ; AVX-NEXT: retq |
| ; AVX-NEXT: LBB59_1: |
| ; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: floor_maskz_sd_trunc: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: retq |
| %mask = trunc i16 %k to i1 |
| %s = extractelement <2 x double> %x, i64 0 |
| %call = tail call double @llvm.floor.f64(double %s) |
| %low = select i1 %mask, double %call, double zeroinitializer |
| %res = insertelement <2 x double> %y, double %low, i64 0 |
| ret <2 x double> %res |
| } |
| |
| define <4 x float> @floor_mask_ss_mask8(<4 x float> %x, <4 x float> %y, <4 x float> %w) nounwind { |
| ; SSE41-LABEL: floor_mask_ss_mask8: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movaps %xmm0, %xmm3 |
| ; SSE41-NEXT: cmpeqps %xmm1, %xmm3 |
| ; SSE41-NEXT: pextrb $0, %xmm3, %eax |
| ; SSE41-NEXT: testb $1, %al |
| ; SSE41-NEXT: je LBB60_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundss $9, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB60_2: |
| ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_mask_ss_mask8: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm3 |
| ; AVX-NEXT: vpextrb $0, %xmm3, %eax |
| ; AVX-NEXT: testb $1, %al |
| ; AVX-NEXT: je LBB60_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB60_2: |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: floor_mask_ss_mask8: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512F-NEXT: vmovaps %xmm2, %xmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: floor_mask_ss_mask8: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512VL-NEXT: vmovaps %xmm2, %xmm0 |
| ; AVX512VL-NEXT: retq |
| %mask1 = fcmp oeq <4 x float> %x, %y |
| %mask = extractelement <4 x i1> %mask1, i64 0 |
| %s = extractelement <4 x float> %x, i64 0 |
| %call = tail call float @llvm.floor.f32(float %s) |
| %dst = extractelement <4 x float> %w, i64 0 |
| %low = select i1 %mask, float %call, float %dst |
| %res = insertelement <4 x float> %y, float %low, i64 0 |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @floor_maskz_ss_mask8(<4 x float> %x, <4 x float> %y) nounwind { |
| ; SSE41-LABEL: floor_maskz_ss_mask8: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movaps %xmm0, %xmm2 |
| ; SSE41-NEXT: cmpeqps %xmm1, %xmm2 |
| ; SSE41-NEXT: pextrb $0, %xmm2, %eax |
| ; SSE41-NEXT: testb $1, %al |
| ; SSE41-NEXT: jne LBB61_1 |
| ; SSE41-NEXT: ## %bb.2: |
| ; SSE41-NEXT: xorps %xmm0, %xmm0 |
| ; SSE41-NEXT: jmp LBB61_3 |
| ; SSE41-NEXT: LBB61_1: |
| ; SSE41-NEXT: roundss $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: LBB61_3: |
| ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_maskz_ss_mask8: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm2 |
| ; AVX-NEXT: vpextrb $0, %xmm2, %eax |
| ; AVX-NEXT: testb $1, %al |
| ; AVX-NEXT: jne LBB61_1 |
| ; AVX-NEXT: ## %bb.2: |
| ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; AVX-NEXT: LBB61_1: |
| ; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: floor_maskz_ss_mask8: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: floor_maskz_ss_mask8: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512VL-NEXT: retq |
| %mask1 = fcmp oeq <4 x float> %x, %y |
| %mask = extractelement <4 x i1> %mask1, i64 0 |
| %s = extractelement <4 x float> %x, i64 0 |
| %call = tail call float @llvm.floor.f32(float %s) |
| %low = select i1 %mask, float %call, float zeroinitializer |
| %res = insertelement <4 x float> %y, float %low, i64 0 |
| ret <4 x float> %res |
| } |
| |
| define <2 x double> @floor_mask_sd_mask8(<2 x double> %x, <2 x double> %y, <2 x double> %w) nounwind { |
| ; SSE41-LABEL: floor_mask_sd_mask8: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movapd %xmm0, %xmm3 |
| ; SSE41-NEXT: cmpeqpd %xmm1, %xmm3 |
| ; SSE41-NEXT: pextrb $0, %xmm3, %eax |
| ; SSE41-NEXT: testb $1, %al |
| ; SSE41-NEXT: je LBB62_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundsd $9, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB62_2: |
| ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_mask_sd_mask8: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm3 |
| ; AVX-NEXT: vpextrb $0, %xmm3, %eax |
| ; AVX-NEXT: testb $1, %al |
| ; AVX-NEXT: je LBB62_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB62_2: |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: floor_mask_sd_mask8: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512F-NEXT: vmovapd %xmm2, %xmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: floor_mask_sd_mask8: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512VL-NEXT: vmovapd %xmm2, %xmm0 |
| ; AVX512VL-NEXT: retq |
| %mask1 = fcmp oeq <2 x double> %x, %y |
| %mask = extractelement <2 x i1> %mask1, i64 0 |
| %s = extractelement <2 x double> %x, i64 0 |
| %call = tail call double @llvm.floor.f64(double %s) |
| %dst = extractelement <2 x double> %w, i64 0 |
| %low = select i1 %mask, double %call, double %dst |
| %res = insertelement <2 x double> %y, double %low, i64 0 |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @floor_maskz_sd_mask8(<2 x double> %x, <2 x double> %y) nounwind { |
| ; SSE41-LABEL: floor_maskz_sd_mask8: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movapd %xmm0, %xmm2 |
| ; SSE41-NEXT: cmpeqpd %xmm1, %xmm2 |
| ; SSE41-NEXT: pextrb $0, %xmm2, %eax |
| ; SSE41-NEXT: testb $1, %al |
| ; SSE41-NEXT: jne LBB63_1 |
| ; SSE41-NEXT: ## %bb.2: |
| ; SSE41-NEXT: xorpd %xmm0, %xmm0 |
| ; SSE41-NEXT: jmp LBB63_3 |
| ; SSE41-NEXT: LBB63_1: |
| ; SSE41-NEXT: roundsd $9, %xmm0, %xmm0 |
| ; SSE41-NEXT: LBB63_3: |
| ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: floor_maskz_sd_mask8: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm2 |
| ; AVX-NEXT: vpextrb $0, %xmm2, %eax |
| ; AVX-NEXT: testb $1, %al |
| ; AVX-NEXT: jne LBB63_1 |
| ; AVX-NEXT: ## %bb.2: |
| ; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; AVX-NEXT: LBB63_1: |
| ; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: floor_maskz_sd_mask8: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: floor_maskz_sd_mask8: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512VL-NEXT: retq |
| %mask1 = fcmp oeq <2 x double> %x, %y |
| %mask = extractelement <2 x i1> %mask1, i64 0 |
| %s = extractelement <2 x double> %x, i64 0 |
| %call = tail call double @llvm.floor.f64(double %s) |
| %low = select i1 %mask, double %call, double zeroinitializer |
| %res = insertelement <2 x double> %y, double %low, i64 0 |
| ret <2 x double> %res |
| } |
| |
| define <4 x float> @ceil_ss(<4 x float> %x, <4 x float> %y) nounwind { |
| ; SSE41-LABEL: ceil_ss: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundss $2, %xmm0, %xmm1 |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_ss: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundss $2, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_ss: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundss $2, %xmm0, %xmm1, %xmm0 |
| ; AVX512-NEXT: retq |
| %s = extractelement <4 x float> %x, i32 0 |
| %call = call float @llvm.ceil.f32(float %s) |
| %res = insertelement <4 x float> %y, float %call, i32 0 |
| ret <4 x float> %res |
| } |
| declare float @llvm.ceil.f32(float %s) |
| |
| define <2 x double> @ceil_sd(<2 x double> %x, <2 x double> %y) nounwind { |
| ; SSE41-LABEL: ceil_sd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundsd $2, %xmm0, %xmm1 |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_sd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vroundsd $2, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_sd: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vroundsd $2, %xmm0, %xmm1, %xmm0 |
| ; AVX512-NEXT: retq |
| %s = extractelement <2 x double> %x, i32 0 |
| %call = call double @llvm.ceil.f64(double %s) |
| %res = insertelement <2 x double> %y, double %call, i32 0 |
| ret <2 x double> %res |
| } |
| declare double @llvm.ceil.f64(double %s) |
| |
| define <4 x float> @ceil_mask_128_ps(<4 x float> %x, <4 x float> %y) nounwind { |
| ; SSE41-LABEL: ceil_mask_128_ps: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $10, %xmm0, %xmm2 |
| ; SSE41-NEXT: cmpeqps %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_mask_128_ps: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm2 |
| ; AVX-NEXT: vroundps $10, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ceil_mask_128_ps: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundps $10, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ceil_mask_128_ps: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscaleps $10, %xmm0, %xmm1 {%k1} |
| ; AVX512VL-NEXT: vmovaps %xmm1, %xmm0 |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <4 x float> %x, %y |
| %call = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) |
| %res = select <4 x i1> %k, <4 x float> %call, <4 x float> %y |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @ceil_maskz_128_ps(<4 x float> %x, <4 x float> %y) nounwind { |
| ; SSE41-LABEL: ceil_maskz_128_ps: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: cmpeqps %xmm0, %xmm1 |
| ; SSE41-NEXT: roundps $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: andps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_maskz_128_ps: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 |
| ; AVX-NEXT: vroundps $10, %xmm0, %xmm0 |
| ; AVX-NEXT: vandps %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ceil_maskz_128_ps: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundps $10, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ceil_maskz_128_ps: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscaleps $10, %xmm0, %xmm0 {%k1} {z} |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <4 x float> %x, %y |
| %call = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) |
| %res = select <4 x i1> %k, <4 x float> %call, <4 x float> zeroinitializer |
| ret <4 x float> %res |
| } |
| |
| define <2 x double> @ceil_mask_128_pd(<2 x double> %x, <2 x double> %y) nounwind { |
| ; SSE41-LABEL: ceil_mask_128_pd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $10, %xmm0, %xmm2 |
| ; SSE41-NEXT: cmpeqpd %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_mask_128_pd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm2 |
| ; AVX-NEXT: vroundpd $10, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ceil_mask_128_pd: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundpd $10, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ceil_mask_128_pd: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscalepd $10, %xmm0, %xmm1 {%k1} |
| ; AVX512VL-NEXT: vmovapd %xmm1, %xmm0 |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <2 x double> %x, %y |
| %call = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) |
| %res = select <2 x i1> %k, <2 x double> %call, <2 x double> %y |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @ceil_maskz_128_pd(<2 x double> %x, <2 x double> %y) nounwind { |
| ; SSE41-LABEL: ceil_maskz_128_pd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: cmpeqpd %xmm0, %xmm1 |
| ; SSE41-NEXT: roundpd $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: andpd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_maskz_128_pd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 |
| ; AVX-NEXT: vroundpd $10, %xmm0, %xmm0 |
| ; AVX-NEXT: vandpd %xmm0, %xmm1, %xmm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ceil_maskz_128_pd: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundpd $10, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ceil_maskz_128_pd: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscalepd $10, %xmm0, %xmm0 {%k1} {z} |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <2 x double> %x, %y |
| %call = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) |
| %res = select <2 x i1> %k, <2 x double> %call, <2 x double> zeroinitializer |
| ret <2 x double> %res |
| } |
| |
| define <8 x float> @ceil_mask_256_ps(<8 x float> %x, <8 x float> %y) nounwind { |
| ; SSE41-LABEL: ceil_mask_256_ps: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $10, %xmm1, %xmm4 |
| ; SSE41-NEXT: cmpeqps %xmm3, %xmm1 |
| ; SSE41-NEXT: roundps $10, %xmm0, %xmm5 |
| ; SSE41-NEXT: cmpeqps %xmm2, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm5, %xmm2 |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm4, %xmm3 |
| ; SSE41-NEXT: movaps %xmm2, %xmm0 |
| ; SSE41-NEXT: movaps %xmm3, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_mask_256_ps: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm2 |
| ; AVX-NEXT: vroundps $10, %ymm0, %ymm0 |
| ; AVX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ceil_mask_256_ps: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundps $10, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ceil_mask_256_ps: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqps %ymm1, %ymm0, %k1 |
| ; AVX512VL-NEXT: vrndscaleps $10, %ymm0, %ymm1 {%k1} |
| ; AVX512VL-NEXT: vmovaps %ymm1, %ymm0 |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <8 x float> %x, %y |
| %call = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) |
| %res = select <8 x i1> %k, <8 x float> %call, <8 x float> %y |
| ret <8 x float> %res |
| } |
| |
| define <8 x float> @ceil_maskz_256_ps(<8 x float> %x, <8 x float> %y) nounwind { |
| ; SSE41-LABEL: ceil_maskz_256_ps: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: cmpeqps %xmm1, %xmm3 |
| ; SSE41-NEXT: cmpeqps %xmm0, %xmm2 |
| ; SSE41-NEXT: roundps $10, %xmm1, %xmm1 |
| ; SSE41-NEXT: andps %xmm3, %xmm1 |
| ; SSE41-NEXT: roundps $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: andps %xmm2, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_maskz_256_ps: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 |
| ; AVX-NEXT: vroundps $10, %ymm0, %ymm0 |
| ; AVX-NEXT: vandps %ymm0, %ymm1, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ceil_maskz_256_ps: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundps $10, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ceil_maskz_256_ps: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqps %ymm1, %ymm0, %k1 |
| ; AVX512VL-NEXT: vrndscaleps $10, %ymm0, %ymm0 {%k1} {z} |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <8 x float> %x, %y |
| %call = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) |
| %res = select <8 x i1> %k, <8 x float> %call, <8 x float> zeroinitializer |
| ret <8 x float> %res |
| } |
| |
| define <4 x double> @ceil_mask_256_pd(<4 x double> %x, <4 x double> %y) nounwind { |
| ; SSE41-LABEL: ceil_mask_256_pd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $10, %xmm1, %xmm4 |
| ; SSE41-NEXT: cmpeqpd %xmm3, %xmm1 |
| ; SSE41-NEXT: roundpd $10, %xmm0, %xmm5 |
| ; SSE41-NEXT: cmpeqpd %xmm2, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm2 |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 |
| ; SSE41-NEXT: movapd %xmm2, %xmm0 |
| ; SSE41-NEXT: movapd %xmm3, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_mask_256_pd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm2 |
| ; AVX-NEXT: vroundpd $10, %ymm0, %ymm0 |
| ; AVX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ceil_mask_256_pd: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundpd $10, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ceil_mask_256_pd: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 |
| ; AVX512VL-NEXT: vrndscalepd $10, %ymm0, %ymm1 {%k1} |
| ; AVX512VL-NEXT: vmovapd %ymm1, %ymm0 |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <4 x double> %x, %y |
| %call = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) |
| %res = select <4 x i1> %k, <4 x double> %call, <4 x double> %y |
| ret <4 x double> %res |
| } |
| |
| define <4 x double> @ceil_maskz_256_pd(<4 x double> %x, <4 x double> %y) nounwind { |
| ; SSE41-LABEL: ceil_maskz_256_pd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: cmpeqpd %xmm1, %xmm3 |
| ; SSE41-NEXT: cmpeqpd %xmm0, %xmm2 |
| ; SSE41-NEXT: roundpd $10, %xmm1, %xmm1 |
| ; SSE41-NEXT: andpd %xmm3, %xmm1 |
| ; SSE41-NEXT: roundpd $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: andpd %xmm2, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_maskz_256_pd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 |
| ; AVX-NEXT: vroundpd $10, %ymm0, %ymm0 |
| ; AVX-NEXT: vandpd %ymm0, %ymm1, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ceil_maskz_256_pd: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vroundpd $10, %ymm0, %ymm0 |
| ; AVX512F-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} |
| ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ceil_maskz_256_pd: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 |
| ; AVX512VL-NEXT: vrndscalepd $10, %ymm0, %ymm0 {%k1} {z} |
| ; AVX512VL-NEXT: retq |
| %k = fcmp oeq <4 x double> %x, %y |
| %call = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) |
| %res = select <4 x i1> %k, <4 x double> %call, <4 x double> zeroinitializer |
| ret <4 x double> %res |
| } |
| |
| define <16 x float> @ceil_mask_512_ps(<16 x float> %x, <16 x float> %y) nounwind { |
| ; SSE41-LABEL: ceil_mask_512_ps: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundps $10, %xmm3, %xmm8 |
| ; SSE41-NEXT: cmpeqps %xmm7, %xmm3 |
| ; SSE41-NEXT: roundps $10, %xmm2, %xmm9 |
| ; SSE41-NEXT: cmpeqps %xmm6, %xmm2 |
| ; SSE41-NEXT: roundps $10, %xmm1, %xmm10 |
| ; SSE41-NEXT: cmpeqps %xmm5, %xmm1 |
| ; SSE41-NEXT: roundps $10, %xmm0, %xmm11 |
| ; SSE41-NEXT: cmpeqps %xmm4, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm11, %xmm4 |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm10, %xmm5 |
| ; SSE41-NEXT: movaps %xmm2, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm9, %xmm6 |
| ; SSE41-NEXT: movaps %xmm3, %xmm0 |
| ; SSE41-NEXT: blendvps %xmm0, %xmm8, %xmm7 |
| ; SSE41-NEXT: movaps %xmm4, %xmm0 |
| ; SSE41-NEXT: movaps %xmm5, %xmm1 |
| ; SSE41-NEXT: movaps %xmm6, %xmm2 |
| ; SSE41-NEXT: movaps %xmm7, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_mask_512_ps: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %ymm3, %ymm1, %ymm4 |
| ; AVX-NEXT: vcmpeqps %ymm2, %ymm0, %ymm5 |
| ; AVX-NEXT: vroundps $10, %ymm1, %ymm1 |
| ; AVX-NEXT: vroundps $10, %ymm0, %ymm0 |
| ; AVX-NEXT: vblendvps %ymm5, %ymm0, %ymm2, %ymm0 |
| ; AVX-NEXT: vblendvps %ymm4, %ymm1, %ymm3, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_mask_512_ps: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512-NEXT: vrndscaleps $10, %zmm0, %zmm1 {%k1} |
| ; AVX512-NEXT: vmovaps %zmm1, %zmm0 |
| ; AVX512-NEXT: retq |
| %k = fcmp oeq <16 x float> %x, %y |
| %call = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) |
| %res = select <16 x i1> %k, <16 x float> %call, <16 x float> %y |
| ret <16 x float> %res |
| } |
| |
| define <16 x float> @ceil_maskz_512_ps(<16 x float> %x, <16 x float> %y) nounwind { |
| ; SSE41-LABEL: ceil_maskz_512_ps: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: cmpeqps %xmm3, %xmm7 |
| ; SSE41-NEXT: cmpeqps %xmm2, %xmm6 |
| ; SSE41-NEXT: cmpeqps %xmm1, %xmm5 |
| ; SSE41-NEXT: cmpeqps %xmm0, %xmm4 |
| ; SSE41-NEXT: roundps $10, %xmm3, %xmm3 |
| ; SSE41-NEXT: andps %xmm7, %xmm3 |
| ; SSE41-NEXT: roundps $10, %xmm2, %xmm2 |
| ; SSE41-NEXT: andps %xmm6, %xmm2 |
| ; SSE41-NEXT: roundps $10, %xmm1, %xmm1 |
| ; SSE41-NEXT: andps %xmm5, %xmm1 |
| ; SSE41-NEXT: roundps $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: andps %xmm4, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_maskz_512_ps: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %ymm3, %ymm1, %ymm3 |
| ; AVX-NEXT: vcmpeqps %ymm2, %ymm0, %ymm2 |
| ; AVX-NEXT: vroundps $10, %ymm1, %ymm1 |
| ; AVX-NEXT: vandps %ymm1, %ymm3, %ymm1 |
| ; AVX-NEXT: vroundps $10, %ymm0, %ymm0 |
| ; AVX-NEXT: vandps %ymm0, %ymm2, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_maskz_512_ps: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512-NEXT: vrndscaleps $10, %zmm0, %zmm0 {%k1} {z} |
| ; AVX512-NEXT: retq |
| %k = fcmp oeq <16 x float> %x, %y |
| %call = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) |
| %res = select <16 x i1> %k, <16 x float> %call, <16 x float> zeroinitializer |
| ret <16 x float> %res |
| } |
| |
| define <8 x double> @ceil_mask_512_pd(<8 x double> %x, <8 x double> %y) nounwind { |
| ; SSE41-LABEL: ceil_mask_512_pd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: roundpd $10, %xmm3, %xmm8 |
| ; SSE41-NEXT: cmpeqpd %xmm7, %xmm3 |
| ; SSE41-NEXT: roundpd $10, %xmm2, %xmm9 |
| ; SSE41-NEXT: cmpeqpd %xmm6, %xmm2 |
| ; SSE41-NEXT: roundpd $10, %xmm1, %xmm10 |
| ; SSE41-NEXT: cmpeqpd %xmm5, %xmm1 |
| ; SSE41-NEXT: roundpd $10, %xmm0, %xmm11 |
| ; SSE41-NEXT: cmpeqpd %xmm4, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm4 |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm5 |
| ; SSE41-NEXT: movapd %xmm2, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm6 |
| ; SSE41-NEXT: movapd %xmm3, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm7 |
| ; SSE41-NEXT: movapd %xmm4, %xmm0 |
| ; SSE41-NEXT: movapd %xmm5, %xmm1 |
| ; SSE41-NEXT: movapd %xmm6, %xmm2 |
| ; SSE41-NEXT: movapd %xmm7, %xmm3 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_mask_512_pd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %ymm3, %ymm1, %ymm4 |
| ; AVX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm5 |
| ; AVX-NEXT: vroundpd $10, %ymm1, %ymm1 |
| ; AVX-NEXT: vroundpd $10, %ymm0, %ymm0 |
| ; AVX-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0 |
| ; AVX-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_mask_512_pd: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512-NEXT: vrndscalepd $10, %zmm0, %zmm1 {%k1} |
| ; AVX512-NEXT: vmovapd %zmm1, %zmm0 |
| ; AVX512-NEXT: retq |
| %k = fcmp oeq <8 x double> %x, %y |
| %call = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) |
| %res = select <8 x i1> %k, <8 x double> %call, <8 x double> %y |
| ret <8 x double> %res |
| } |
| |
| define <8 x double> @ceil_maskz_512_pd(<8 x double> %x, <8 x double> %y) nounwind { |
| ; SSE41-LABEL: ceil_maskz_512_pd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: cmpeqpd %xmm3, %xmm7 |
| ; SSE41-NEXT: cmpeqpd %xmm2, %xmm6 |
| ; SSE41-NEXT: cmpeqpd %xmm1, %xmm5 |
| ; SSE41-NEXT: cmpeqpd %xmm0, %xmm4 |
| ; SSE41-NEXT: roundpd $10, %xmm3, %xmm3 |
| ; SSE41-NEXT: andpd %xmm7, %xmm3 |
| ; SSE41-NEXT: roundpd $10, %xmm2, %xmm2 |
| ; SSE41-NEXT: andpd %xmm6, %xmm2 |
| ; SSE41-NEXT: roundpd $10, %xmm1, %xmm1 |
| ; SSE41-NEXT: andpd %xmm5, %xmm1 |
| ; SSE41-NEXT: roundpd $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: andpd %xmm4, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_maskz_512_pd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %ymm3, %ymm1, %ymm3 |
| ; AVX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm2 |
| ; AVX-NEXT: vroundpd $10, %ymm1, %ymm1 |
| ; AVX-NEXT: vandpd %ymm1, %ymm3, %ymm1 |
| ; AVX-NEXT: vroundpd $10, %ymm0, %ymm0 |
| ; AVX-NEXT: vandpd %ymm0, %ymm2, %ymm0 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_maskz_512_pd: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512-NEXT: vrndscalepd $10, %zmm0, %zmm0 {%k1} {z} |
| ; AVX512-NEXT: retq |
| %k = fcmp oeq <8 x double> %x, %y |
| %call = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) |
| %res = select <8 x i1> %k, <8 x double> %call, <8 x double> zeroinitializer |
| ret <8 x double> %res |
| } |
| |
| define <4 x float> @ceil_mask_ss(<4 x float> %x, <4 x float> %y, <4 x float> %w, i8 %k) nounwind { |
| ; SSE41-LABEL: ceil_mask_ss: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: je LBB78_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundss $10, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB78_2: |
| ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_mask_ss: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: je LBB78_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB78_2: |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_mask_ss: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512-NEXT: vmovaps %xmm2, %xmm0 |
| ; AVX512-NEXT: retq |
| %mask = and i8 %k, 1 |
| %nmask = icmp eq i8 %mask, 0 |
| %s = extractelement <4 x float> %x, i64 0 |
| %call = tail call float @llvm.ceil.f32(float %s) |
| %dst = extractelement <4 x float> %w, i64 0 |
| %low = select i1 %nmask, float %dst, float %call |
| %res = insertelement <4 x float> %y, float %low, i64 0 |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @ceil_maskz_ss(<4 x float> %x, <4 x float> %y, i8 %k) nounwind { |
| ; SSE41-LABEL: ceil_maskz_ss: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: je LBB79_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundss $10, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB79_2: |
| ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_maskz_ss: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 |
| ; AVX-NEXT: je LBB79_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB79_2: |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_maskz_ss: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: retq |
| %mask = and i8 %k, 1 |
| %nmask = icmp eq i8 %mask, 0 |
| %s = extractelement <4 x float> %x, i64 0 |
| %call = tail call float @llvm.ceil.f32(float %s) |
| %low = select i1 %nmask, float zeroinitializer, float %call |
| %res = insertelement <4 x float> %y, float %low, i64 0 |
| ret <4 x float> %res |
| } |
| |
| define <2 x double> @ceil_mask_sd(<2 x double> %x, <2 x double> %y, <2 x double> %w, i8 %k) nounwind { |
| ; SSE41-LABEL: ceil_mask_sd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: je LBB80_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundsd $10, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB80_2: |
| ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_mask_sd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: je LBB80_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB80_2: |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_mask_sd: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512-NEXT: vmovapd %xmm2, %xmm0 |
| ; AVX512-NEXT: retq |
| %mask = and i8 %k, 1 |
| %nmask = icmp eq i8 %mask, 0 |
| %s = extractelement <2 x double> %x, i64 0 |
| %call = tail call double @llvm.ceil.f64(double %s) |
| %dst = extractelement <2 x double> %w, i64 0 |
| %low = select i1 %nmask, double %dst, double %call |
| %res = insertelement <2 x double> %y, double %low, i64 0 |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @ceil_maskz_sd(<2 x double> %x, <2 x double> %y, i8 %k) nounwind { |
| ; SSE41-LABEL: ceil_maskz_sd: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: xorpd %xmm2, %xmm2 |
| ; SSE41-NEXT: je LBB81_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundsd $10, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB81_2: |
| ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_maskz_sd: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 |
| ; AVX-NEXT: je LBB81_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB81_2: |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_maskz_sd: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: retq |
| %mask = and i8 %k, 1 |
| %nmask = icmp eq i8 %mask, 0 |
| %s = extractelement <2 x double> %x, i64 0 |
| %call = tail call double @llvm.ceil.f64(double %s) |
| %low = select i1 %nmask, double zeroinitializer, double %call |
| %res = insertelement <2 x double> %y, double %low, i64 0 |
| ret <2 x double> %res |
| } |
| |
| define <4 x float> @ceil_mask_ss_trunc(<4 x float> %x, <4 x float> %y, <4 x float> %w, i16 %k) nounwind { |
| ; SSE41-LABEL: ceil_mask_ss_trunc: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: je LBB82_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundss $10, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB82_2: |
| ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_mask_ss_trunc: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: je LBB82_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB82_2: |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_mask_ss_trunc: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512-NEXT: vmovaps %xmm2, %xmm0 |
| ; AVX512-NEXT: retq |
| %mask = trunc i16 %k to i1 |
| %s = extractelement <4 x float> %x, i64 0 |
| %call = tail call float @llvm.ceil.f32(float %s) |
| %dst = extractelement <4 x float> %w, i64 0 |
| %low = select i1 %mask, float %call, float %dst |
| %res = insertelement <4 x float> %y, float %low, i64 0 |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @ceil_maskz_ss_trunc(<4 x float> %x, <4 x float> %y, i16 %k) nounwind { |
| ; SSE41-LABEL: ceil_maskz_ss_trunc: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: jne LBB83_1 |
| ; SSE41-NEXT: ## %bb.2: |
| ; SSE41-NEXT: xorps %xmm0, %xmm0 |
| ; SSE41-NEXT: jmp LBB83_3 |
| ; SSE41-NEXT: LBB83_1: |
| ; SSE41-NEXT: roundss $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: LBB83_3: |
| ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_maskz_ss_trunc: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: jne LBB83_1 |
| ; AVX-NEXT: ## %bb.2: |
| ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; AVX-NEXT: LBB83_1: |
| ; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_maskz_ss_trunc: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: retq |
| %mask = trunc i16 %k to i1 |
| %s = extractelement <4 x float> %x, i64 0 |
| %call = tail call float @llvm.ceil.f32(float %s) |
| %low = select i1 %mask, float %call, float zeroinitializer |
| %res = insertelement <4 x float> %y, float %low, i64 0 |
| ret <4 x float> %res |
| } |
| |
| define <2 x double> @ceil_mask_sd_trunc(<2 x double> %x, <2 x double> %y, <2 x double> %w, i16 %k) nounwind { |
| ; SSE41-LABEL: ceil_mask_sd_trunc: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: je LBB84_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundsd $10, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB84_2: |
| ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_mask_sd_trunc: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: je LBB84_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB84_2: |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_mask_sd_trunc: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512-NEXT: vmovapd %xmm2, %xmm0 |
| ; AVX512-NEXT: retq |
| %mask = trunc i16 %k to i1 |
| %s = extractelement <2 x double> %x, i64 0 |
| %call = tail call double @llvm.ceil.f64(double %s) |
| %dst = extractelement <2 x double> %w, i64 0 |
| %low = select i1 %mask, double %call, double %dst |
| %res = insertelement <2 x double> %y, double %low, i64 0 |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @ceil_maskz_sd_trunc(<2 x double> %x, <2 x double> %y, i16 %k) nounwind { |
| ; SSE41-LABEL: ceil_maskz_sd_trunc: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: testb $1, %dil |
| ; SSE41-NEXT: jne LBB85_1 |
| ; SSE41-NEXT: ## %bb.2: |
| ; SSE41-NEXT: xorpd %xmm0, %xmm0 |
| ; SSE41-NEXT: jmp LBB85_3 |
| ; SSE41-NEXT: LBB85_1: |
| ; SSE41-NEXT: roundsd $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: LBB85_3: |
| ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_maskz_sd_trunc: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: testb $1, %dil |
| ; AVX-NEXT: jne LBB85_1 |
| ; AVX-NEXT: ## %bb.2: |
| ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] |
| ; AVX-NEXT: retq |
| ; AVX-NEXT: LBB85_1: |
| ; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512-LABEL: ceil_maskz_sd_trunc: |
| ; AVX512: ## %bb.0: |
| ; AVX512-NEXT: kmovw %edi, %k1 |
| ; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: retq |
| %mask = trunc i16 %k to i1 |
| %s = extractelement <2 x double> %x, i64 0 |
| %call = tail call double @llvm.ceil.f64(double %s) |
| %low = select i1 %mask, double %call, double zeroinitializer |
| %res = insertelement <2 x double> %y, double %low, i64 0 |
| ret <2 x double> %res |
| } |
| |
| define <4 x float> @ceil_mask_ss_mask8(<4 x float> %x, <4 x float> %y, <4 x float> %w) nounwind { |
| ; SSE41-LABEL: ceil_mask_ss_mask8: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movaps %xmm0, %xmm3 |
| ; SSE41-NEXT: cmpeqps %xmm1, %xmm3 |
| ; SSE41-NEXT: pextrb $0, %xmm3, %eax |
| ; SSE41-NEXT: testb $1, %al |
| ; SSE41-NEXT: je LBB86_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundss $10, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB86_2: |
| ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_mask_ss_mask8: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm3 |
| ; AVX-NEXT: vpextrb $0, %xmm3, %eax |
| ; AVX-NEXT: testb $1, %al |
| ; AVX-NEXT: je LBB86_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB86_2: |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ceil_mask_ss_mask8: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512F-NEXT: vmovaps %xmm2, %xmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ceil_mask_ss_mask8: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512VL-NEXT: vmovaps %xmm2, %xmm0 |
| ; AVX512VL-NEXT: retq |
| %mask1 = fcmp oeq <4 x float> %x, %y |
| %mask = extractelement <4 x i1> %mask1, i64 0 |
| %s = extractelement <4 x float> %x, i64 0 |
| %call = tail call float @llvm.ceil.f32(float %s) |
| %dst = extractelement <4 x float> %w, i64 0 |
| %low = select i1 %mask, float %call, float %dst |
| %res = insertelement <4 x float> %y, float %low, i64 0 |
| ret <4 x float> %res |
| } |
| |
| define <4 x float> @ceil_maskz_ss_mask8(<4 x float> %x, <4 x float> %y) nounwind { |
| ; SSE41-LABEL: ceil_maskz_ss_mask8: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movaps %xmm0, %xmm2 |
| ; SSE41-NEXT: cmpeqps %xmm1, %xmm2 |
| ; SSE41-NEXT: pextrb $0, %xmm2, %eax |
| ; SSE41-NEXT: testb $1, %al |
| ; SSE41-NEXT: jne LBB87_1 |
| ; SSE41-NEXT: ## %bb.2: |
| ; SSE41-NEXT: xorps %xmm0, %xmm0 |
| ; SSE41-NEXT: jmp LBB87_3 |
| ; SSE41-NEXT: LBB87_1: |
| ; SSE41-NEXT: roundss $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: LBB87_3: |
| ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] |
| ; SSE41-NEXT: movaps %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_maskz_ss_mask8: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm2 |
| ; AVX-NEXT: vpextrb $0, %xmm2, %eax |
| ; AVX-NEXT: testb $1, %al |
| ; AVX-NEXT: jne LBB87_1 |
| ; AVX-NEXT: ## %bb.2: |
| ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; AVX-NEXT: LBB87_1: |
| ; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ceil_maskz_ss_mask8: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ceil_maskz_ss_mask8: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512VL-NEXT: retq |
| %mask1 = fcmp oeq <4 x float> %x, %y |
| %mask = extractelement <4 x i1> %mask1, i64 0 |
| %s = extractelement <4 x float> %x, i64 0 |
| %call = tail call float @llvm.ceil.f32(float %s) |
| %low = select i1 %mask, float %call, float zeroinitializer |
| %res = insertelement <4 x float> %y, float %low, i64 0 |
| ret <4 x float> %res |
| } |
| |
| define <2 x double> @ceil_mask_sd_mask8(<2 x double> %x, <2 x double> %y, <2 x double> %w) nounwind { |
| ; SSE41-LABEL: ceil_mask_sd_mask8: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movapd %xmm0, %xmm3 |
| ; SSE41-NEXT: cmpeqpd %xmm1, %xmm3 |
| ; SSE41-NEXT: pextrb $0, %xmm3, %eax |
| ; SSE41-NEXT: testb $1, %al |
| ; SSE41-NEXT: je LBB88_2 |
| ; SSE41-NEXT: ## %bb.1: |
| ; SSE41-NEXT: xorps %xmm2, %xmm2 |
| ; SSE41-NEXT: roundsd $10, %xmm0, %xmm2 |
| ; SSE41-NEXT: LBB88_2: |
| ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm2[0],xmm1[1] |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_mask_sd_mask8: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm3 |
| ; AVX-NEXT: vpextrb $0, %xmm3, %eax |
| ; AVX-NEXT: testb $1, %al |
| ; AVX-NEXT: je LBB88_2 |
| ; AVX-NEXT: ## %bb.1: |
| ; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm2 |
| ; AVX-NEXT: LBB88_2: |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ceil_mask_sd_mask8: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512F-NEXT: vmovapd %xmm2, %xmm0 |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ceil_mask_sd_mask8: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1} |
| ; AVX512VL-NEXT: vmovapd %xmm2, %xmm0 |
| ; AVX512VL-NEXT: retq |
| %mask1 = fcmp oeq <2 x double> %x, %y |
| %mask = extractelement <2 x i1> %mask1, i64 0 |
| %s = extractelement <2 x double> %x, i64 0 |
| %call = tail call double @llvm.ceil.f64(double %s) |
| %dst = extractelement <2 x double> %w, i64 0 |
| %low = select i1 %mask, double %call, double %dst |
| %res = insertelement <2 x double> %y, double %low, i64 0 |
| ret <2 x double> %res |
| } |
| |
| define <2 x double> @ceil_maskz_sd_mask8(<2 x double> %x, <2 x double> %y) nounwind { |
| ; SSE41-LABEL: ceil_maskz_sd_mask8: |
| ; SSE41: ## %bb.0: |
| ; SSE41-NEXT: movapd %xmm0, %xmm2 |
| ; SSE41-NEXT: cmpeqpd %xmm1, %xmm2 |
| ; SSE41-NEXT: pextrb $0, %xmm2, %eax |
| ; SSE41-NEXT: testb $1, %al |
| ; SSE41-NEXT: jne LBB89_1 |
| ; SSE41-NEXT: ## %bb.2: |
| ; SSE41-NEXT: xorpd %xmm0, %xmm0 |
| ; SSE41-NEXT: jmp LBB89_3 |
| ; SSE41-NEXT: LBB89_1: |
| ; SSE41-NEXT: roundsd $10, %xmm0, %xmm0 |
| ; SSE41-NEXT: LBB89_3: |
| ; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] |
| ; SSE41-NEXT: movapd %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; AVX-LABEL: ceil_maskz_sd_mask8: |
| ; AVX: ## %bb.0: |
| ; AVX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm2 |
| ; AVX-NEXT: vpextrb $0, %xmm2, %eax |
| ; AVX-NEXT: testb $1, %al |
| ; AVX-NEXT: jne LBB89_1 |
| ; AVX-NEXT: ## %bb.2: |
| ; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; AVX-NEXT: LBB89_1: |
| ; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 |
| ; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ceil_maskz_sd_mask8: |
| ; AVX512F: ## %bb.0: |
| ; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 |
| ; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 |
| ; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 |
| ; AVX512F-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512F-NEXT: vzeroupper |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ceil_maskz_sd_mask8: |
| ; AVX512VL: ## %bb.0: |
| ; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k1 |
| ; AVX512VL-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z} |
| ; AVX512VL-NEXT: retq |
| %mask1 = fcmp oeq <2 x double> %x, %y |
| %mask = extractelement <2 x i1> %mask1, i64 0 |
| %s = extractelement <2 x double> %x, i64 0 |
| %call = tail call double @llvm.ceil.f64(double %s) |
| %low = select i1 %mask, double %call, double zeroinitializer |
| %res = insertelement <2 x double> %y, double %low, i64 0 |
| ret <2 x double> %res |
| } |