third_party/llvm-7.0/llvm/test/CodeGen/AArch64/trunc-v1i64.ll - SwiftShader - Git at Google

 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s

 ; An optimization in DAG Combiner to fold
 ; (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...))
 ; will generate nodes like:
 ;     v1i32 trunc v1i64, v1i16 trunc v1i64, v1i8 trunc v1i64.
 ; And such nodes will be defaultly scalarized in type legalization. But such
 ; scalarization will cause an assertion failure, as v1i64 is a legal type in
 ; AArch64. We change the default behaviour from be scalarized to be widen.

 ; FIXME: Currently XTN is generated for v1i32, but it can be optimized.
 ; Just like v1i16 and v1i8, there is no XTN generated.

 define <2 x i32> @test_v1i32_0(<1 x i64> %in0) {
 ; CHECK-LABEL: test_v1i32_0:
 ; CHECK: xtn v0.2s, v0.2d
   %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
   %2 = trunc <2 x i64> %1 to <2 x i32>
   ret <2 x i32> %2
 }

 define <2 x i32> @test_v1i32_1(<1 x i64> %in0) {
 ; CHECK-LABEL: test_v1i32_1:
 ; CHECK: xtn v0.2s, v0.2d
 ; CHECK-NEXT: dup v0.2s, v0.s[0]
   %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 undef, i32 0>
   %2 = trunc <2 x i64> %1 to <2 x i32>
   ret <2 x i32> %2
 }

 define <4 x i16> @test_v1i16_0(<1 x i64> %in0) {
 ; CHECK-LABEL: test_v1i16_0:
 ; CHECK-NOT: xtn
   %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
   %2 = trunc <4 x i64> %1 to <4 x i16>
   ret <4 x i16> %2
 }

 define <4 x i16> @test_v1i16_1(<1 x i64> %in0) {
 ; CHECK-LABEL: test_v1i16_1:
 ; CHECK-NOT: xtn
 ; CHECK: dup v0.4h, v0.h[0]
   %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef>
   %2 = trunc <4 x i64> %1 to <4 x i16>
   ret <4 x i16> %2
 }

 define <8 x i8> @test_v1i8_0(<1 x i64> %in0) {
 ; CHECK-LABEL: test_v1i8_0:
 ; CHECK-NOT: xtn
   %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2 = trunc <8 x i64> %1 to <8 x i8>
   ret <8 x i8> %2
 }

 define <8 x i8> @test_v1i8_1(<1 x i64> %in0) {
 ; CHECK-LABEL: test_v1i8_1:
 ; CHECK-NOT: xtn
 ; CHECK: dup v0.8b, v0.b[0]
   %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2 = trunc <8 x i64> %1 to <8 x i8>
   ret <8 x i8> %2
 }

 ; PR20777: v1i1 is also problematic, but we can't widen it, so we extract_elt
 ; the i64 out of the v1i64 operand, and truncate that scalar instead.

 define <1 x i1> @test_v1i1_0(<1 x i64> %in0) {
 ; CHECK-LABEL: test_v1i1_0:
 ; CHECK: fmov w0, s0
   %1 = trunc <1 x i64> %in0 to <1 x i1>
   ret <1 x i1> %1
 }

 define i1 @test_v1i1_1(<1 x i64> %in0) {
 ; CHECK-LABEL: test_v1i1_1:
 ; CHECK: fmov [[REG:w[0-9]+]], s0
   %1 = trunc <1 x i64> %in0 to <1 x i1>
 ; CHECK: and w0, [[REG]], #0x1
   %2 = extractelement <1 x i1> %1, i32 0
   ret i1 %2
 }
	; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s \| FileCheck %s

	; An optimization in DAG Combiner to fold
	; (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...))
	; will generate nodes like:
	; v1i32 trunc v1i64, v1i16 trunc v1i64, v1i8 trunc v1i64.
	; And such nodes will be defaultly scalarized in type legalization. But such
	; scalarization will cause an assertion failure, as v1i64 is a legal type in
	; AArch64. We change the default behaviour from be scalarized to be widen.

	; FIXME: Currently XTN is generated for v1i32, but it can be optimized.
	; Just like v1i16 and v1i8, there is no XTN generated.

	define <2 x i32> @test_v1i32_0(<1 x i64> %in0) {
	; CHECK-LABEL: test_v1i32_0:
	; CHECK: xtn v0.2s, v0.2d
	%1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
	%2 = trunc <2 x i64> %1 to <2 x i32>
	ret <2 x i32> %2
	}

	define <2 x i32> @test_v1i32_1(<1 x i64> %in0) {
	; CHECK-LABEL: test_v1i32_1:
	; CHECK: xtn v0.2s, v0.2d
	; CHECK-NEXT: dup v0.2s, v0.s[0]
	%1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 undef, i32 0>
	%2 = trunc <2 x i64> %1 to <2 x i32>
	ret <2 x i32> %2
	}

	define <4 x i16> @test_v1i16_0(<1 x i64> %in0) {
	; CHECK-LABEL: test_v1i16_0:
	; CHECK-NOT: xtn
	%1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
	%2 = trunc <4 x i64> %1 to <4 x i16>
	ret <4 x i16> %2
	}

	define <4 x i16> @test_v1i16_1(<1 x i64> %in0) {
	; CHECK-LABEL: test_v1i16_1:
	; CHECK-NOT: xtn
	; CHECK: dup v0.4h, v0.h[0]
	%1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef>
	%2 = trunc <4 x i64> %1 to <4 x i16>
	ret <4 x i16> %2
	}

	define <8 x i8> @test_v1i8_0(<1 x i64> %in0) {
	; CHECK-LABEL: test_v1i8_0:
	; CHECK-NOT: xtn
	%1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = trunc <8 x i64> %1 to <8 x i8>
	ret <8 x i8> %2
	}

	define <8 x i8> @test_v1i8_1(<1 x i64> %in0) {
	; CHECK-LABEL: test_v1i8_1:
	; CHECK-NOT: xtn
	; CHECK: dup v0.8b, v0.b[0]
	%1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = trunc <8 x i64> %1 to <8 x i8>
	ret <8 x i8> %2
	}

	; PR20777: v1i1 is also problematic, but we can't widen it, so we extract_elt
	; the i64 out of the v1i64 operand, and truncate that scalar instead.

	define <1 x i1> @test_v1i1_0(<1 x i64> %in0) {
	; CHECK-LABEL: test_v1i1_0:
	; CHECK: fmov w0, s0
	%1 = trunc <1 x i64> %in0 to <1 x i1>
	ret <1 x i1> %1
	}

	define i1 @test_v1i1_1(<1 x i64> %in0) {
	; CHECK-LABEL: test_v1i1_1:
	; CHECK: fmov [[REG:w[0-9]+]], s0
	%1 = trunc <1 x i64> %in0 to <1 x i1>
	; CHECK: and w0, [[REG]], #0x1
	%2 = extractelement <1 x i1> %1, i32 0
	ret i1 %2
	}