Subzero: Use scalar arithmetic when no vector instruction exists.

Implement scalarizeArithmetic() which extracts the components of the
input vectors, performs the operation with scalar instructions, and
builds the output vector component by component.

Fix the lowering of sdiv and srem.  These were previously emitting a
wrong instruction (cdq) for i8 and i16 inputs (needing cbw, cwd).

In the test_arith crosstest, mask the inputs to vector shift
operations to ensure that the shifts are in range.  Otherwise the
Subzero output is not identical to the llc output in some (undefined)
cases.

BUG=none
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/443203003
diff --git a/tests_lit/llvm2ice_tests/sdiv.ll b/tests_lit/llvm2ice_tests/sdiv.ll
new file mode 100644
index 0000000..ec43d6a
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/sdiv.ll
@@ -0,0 +1,82 @@
+; This checks the correctness of the lowering code for the small
+; integer variants of sdiv and srem.
+
+; RUN: %llvm2ice --verbose none %s | FileCheck  %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck  %s
+; RUN: %llvm2ice -O2 --verbose none %s \
+; RUN:               | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj
+; RUN: %llvm2ice -Om1 --verbose none %s \
+; RUN:               | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj
+; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
+; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
+; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
+; RUN:                           | FileCheck --check-prefix=DUMP %s
+
+define i32 @sdiv_i8(i32 %a.i32, i32 %b.i32) {
+entry:
+  %a = trunc i32 %a.i32 to i8
+  %b = trunc i32 %b.i32 to i8
+  %res = sdiv i8 %a, %b
+  %res.i32 = zext i8 %res to i32
+  ret i32 %res.i32
+; CHECK-LABEL: sdiv_i8:
+; CHECK: cbw
+; CHECK: idiv
+}
+
+define i32 @sdiv_i16(i32 %a.i32, i32 %b.i32) {
+entry:
+  %a = trunc i32 %a.i32 to i16
+  %b = trunc i32 %b.i32 to i16
+  %res = sdiv i16 %a, %b
+  %res.i32 = zext i16 %res to i32
+  ret i32 %res.i32
+; CHECK-LABEL: sdiv_i16:
+; CHECK: cwd
+; CHECK: idiv
+}
+
+define i32 @sdiv_i32(i32 %a, i32 %b) {
+entry:
+  %res = sdiv i32 %a, %b
+  ret i32 %res
+; CHECK-LABEL: sdiv_i32:
+; CHECK: cdq
+; CHECK: idiv
+}
+
+define i32 @srem_i8(i32 %a.i32, i32 %b.i32) {
+entry:
+  %a = trunc i32 %a.i32 to i8
+  %b = trunc i32 %b.i32 to i8
+  %res = srem i8 %a, %b
+  %res.i32 = zext i8 %res to i32
+  ret i32 %res.i32
+; CHECK-LABEL: srem_i8:
+; CHECK: cbw
+; CHECK: idiv
+}
+
+define i32 @srem_i16(i32 %a.i32, i32 %b.i32) {
+entry:
+  %a = trunc i32 %a.i32 to i16
+  %b = trunc i32 %b.i32 to i16
+  %res = srem i16 %a, %b
+  %res.i32 = zext i16 %res to i32
+  ret i32 %res.i32
+; CHECK-LABEL: srem_i16:
+; CHECK: cwd
+; CHECK: idiv
+}
+
+define i32 @srem_i32(i32 %a, i32 %b) {
+entry:
+  %res = srem i32 %a, %b
+  ret i32 %res
+; CHECK-LABEL: srem_i32:
+; CHECK: cdq
+; CHECK: idiv
+}
+
+; ERRORS-NOT: ICE translation error
+; DUMP-NOT: SZ
diff --git a/tests_lit/llvm2ice_tests/vector-arith.ll b/tests_lit/llvm2ice_tests/vector-arith.ll
index 94acfe0..e093f97 100644
--- a/tests_lit/llvm2ice_tests/vector-arith.ll
+++ b/tests_lit/llvm2ice_tests/vector-arith.ll
@@ -56,7 +56,10 @@
   %res = frem <4 x float> %arg0, %arg1
   ret <4 x float> %res
 ; CHECK-LABEL: test_frem:
-; CHECK: Sz_frem_v4f32
+; CHECK: fmodf
+; CHECK: fmodf
+; CHECK: fmodf
+; CHECK: fmodf
 }
 
 define <16 x i8> @test_add_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
@@ -104,7 +107,22 @@
   %res = mul <16 x i8> %arg0, %arg1
   ret <16 x i8> %res
 ; CHECK-LABEL: test_mul_v16i8:
-; CHECK: Sz_mul_v16i8
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
+; CHECK: imul
 }
 
 define <16 x i8> @test_shl_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
@@ -112,7 +130,22 @@
   %res = shl <16 x i8> %arg0, %arg1
   ret <16 x i8> %res
 ; CHECK-LABEL: test_shl_v16i8:
-; CHECK: Sz_shl_v16i8
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
 }
 
 define <16 x i8> @test_lshr_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
@@ -120,7 +153,22 @@
   %res = lshr <16 x i8> %arg0, %arg1
   ret <16 x i8> %res
 ; CHECK-LABEL: test_lshr_v16i8:
-; CHECK: Sz_lshr_v16i8
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
 }
 
 define <16 x i8> @test_ashr_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
@@ -128,7 +176,22 @@
   %res = ashr <16 x i8> %arg0, %arg1
   ret <16 x i8> %res
 ; CHECK-LABEL: test_ashr_v16i8:
-; CHECK: Sz_ashr_v16i8
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
 }
 
 define <16 x i8> @test_udiv_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
@@ -136,7 +199,22 @@
   %res = udiv <16 x i8> %arg0, %arg1
   ret <16 x i8> %res
 ; CHECK-LABEL: test_udiv_v16i8:
-; CHECK: Sz_udiv_v16i8
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
 }
 
 define <16 x i8> @test_sdiv_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
@@ -144,7 +222,22 @@
   %res = sdiv <16 x i8> %arg0, %arg1
   ret <16 x i8> %res
 ; CHECK-LABEL: test_sdiv_v16i8:
-; CHECK: Sz_sdiv_v16i8
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
 }
 
 define <16 x i8> @test_urem_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
@@ -152,7 +245,22 @@
   %res = urem <16 x i8> %arg0, %arg1
   ret <16 x i8> %res
 ; CHECK-LABEL: test_urem_v16i8:
-; CHECK: Sz_urem_v16i8
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
 }
 
 define <16 x i8> @test_srem_v16i8(<16 x i8> %arg0, <16 x i8> %arg1) {
@@ -160,7 +268,22 @@
   %res = srem <16 x i8> %arg0, %arg1
   ret <16 x i8> %res
 ; CHECK-LABEL: test_srem_v16i8:
-; CHECK: Sz_srem_v16i8
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
 }
 
 define <8 x i16> @test_add_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
@@ -216,7 +339,14 @@
   %res = shl <8 x i16> %arg0, %arg1
   ret <8 x i16> %res
 ; CHECK-LABEL: test_shl_v8i16:
-; CHECK: Sz_shl_v8i16
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
 }
 
 define <8 x i16> @test_lshr_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
@@ -224,7 +354,14 @@
   %res = lshr <8 x i16> %arg0, %arg1
   ret <8 x i16> %res
 ; CHECK-LABEL: test_lshr_v8i16:
-; CHECK: Sz_lshr_v8i16
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
 }
 
 define <8 x i16> @test_ashr_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
@@ -232,7 +369,14 @@
   %res = ashr <8 x i16> %arg0, %arg1
   ret <8 x i16> %res
 ; CHECK-LABEL: test_ashr_v8i16:
-; CHECK: Sz_ashr_v8i16
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
 }
 
 define <8 x i16> @test_udiv_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
@@ -240,7 +384,14 @@
   %res = udiv <8 x i16> %arg0, %arg1
   ret <8 x i16> %res
 ; CHECK-LABEL: test_udiv_v8i16:
-; CHECK: Sz_udiv_v8i16
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
 }
 
 define <8 x i16> @test_sdiv_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
@@ -248,7 +399,14 @@
   %res = sdiv <8 x i16> %arg0, %arg1
   ret <8 x i16> %res
 ; CHECK-LABEL: test_sdiv_v8i16:
-; CHECK: Sz_sdiv_v8i16
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
 }
 
 define <8 x i16> @test_urem_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
@@ -256,7 +414,14 @@
   %res = urem <8 x i16> %arg0, %arg1
   ret <8 x i16> %res
 ; CHECK-LABEL: test_urem_v8i16:
-; CHECK: Sz_urem_v8i16
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
 }
 
 define <8 x i16> @test_srem_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
@@ -264,7 +429,14 @@
   %res = srem <8 x i16> %arg0, %arg1
   ret <8 x i16> %res
 ; CHECK-LABEL: test_srem_v8i16:
-; CHECK: Sz_srem_v8i16
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
 }
 
 define <4 x i32> @test_add_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
@@ -324,7 +496,10 @@
   %res = shl <4 x i32> %arg0, %arg1
   ret <4 x i32> %res
 ; CHECK-LABEL: test_shl_v4i32:
-; CHECK: Sz_shl_v4i32
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
+; CHECK: shl
 
 ; This line is to ensure that pmulld is generated in test_mul_v4i32 above.
 ; SSE41-LABEL: test_shl_v4i32:
@@ -335,7 +510,10 @@
   %res = lshr <4 x i32> %arg0, %arg1
   ret <4 x i32> %res
 ; CHECK-LABEL: test_lshr_v4i32:
-; CHECK: Sz_lshr_v4i32
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
+; CHECK: shr
 }
 
 define <4 x i32> @test_ashr_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
@@ -343,7 +521,10 @@
   %res = ashr <4 x i32> %arg0, %arg1
   ret <4 x i32> %res
 ; CHECK-LABEL: test_ashr_v4i32:
-; CHECK: Sz_ashr_v4i32
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
+; CHECK: sar
 }
 
 define <4 x i32> @test_udiv_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
@@ -351,7 +532,10 @@
   %res = udiv <4 x i32> %arg0, %arg1
   ret <4 x i32> %res
 ; CHECK-LABEL: test_udiv_v4i32:
-; CHECK: Sz_udiv_v4i32
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
 }
 
 define <4 x i32> @test_sdiv_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
@@ -359,7 +543,10 @@
   %res = sdiv <4 x i32> %arg0, %arg1
   ret <4 x i32> %res
 ; CHECK-LABEL: test_sdiv_v4i32:
-; CHECK: Sz_sdiv_v4i32
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
 }
 
 define <4 x i32> @test_urem_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
@@ -367,7 +554,10 @@
   %res = urem <4 x i32> %arg0, %arg1
   ret <4 x i32> %res
 ; CHECK-LABEL: test_urem_v4i32:
-; CHECK: Sz_urem_v4i32
+; CHECK: div
+; CHECK: div
+; CHECK: div
+; CHECK: div
 }
 
 define <4 x i32> @test_srem_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) {
@@ -375,7 +565,10 @@
   %res = srem <4 x i32> %arg0, %arg1
   ret <4 x i32> %res
 ; CHECK-LABEL: test_srem_v4i32:
-; CHECK: Sz_srem_v4i32
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
+; CHECK: idiv
 }
 
 ; ERRORS-NOT: ICE translation error