Subzero: Strength-reduce mul by certain constants.

These all appear to some degree in spec2k.

This is implemented for i8/i16/i32 types.  It is done as part of core lowering, so in theory all optimization levels could benefit, but it is explicitly disabled for Om1/O0 to keep things simple there.

While clang appears to strength-reduce udiv/urem by a constant power of 2, for some reason it does not always strength-reduce multiplies (given that they appear in the spec2k bitcode).

For multiplies by 3, 5, or 9, we can make use of the lea instruction.  We can do combinations of shift and lea to multiply by other constants, e.g. 100=5*5*4.  If too many operations would be required, just give up and use the mul instruction.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4095
R=jpp@chromium.org, jvoung@chromium.org

Review URL: https://codereview.chromium.org/1146803002
diff --git a/tests_lit/assembler/x86/immediate_encodings.ll b/tests_lit/assembler/x86/immediate_encodings.ll
index c829f1b..0c96720 100644
--- a/tests_lit/assembler/x86/immediate_encodings.ll
+++ b/tests_lit/assembler/x86/immediate_encodings.ll
@@ -197,25 +197,25 @@
 define internal i32 @testMul16Imm16(i32 %arg) {
 entry:
   %arg_i16 = trunc i32 %arg to i16
-  %tmp = mul i16 %arg_i16, 1024
+  %tmp = mul i16 %arg_i16, 1025
   %result_i16 = add i16 %tmp, 1
   %result = zext i16 %result_i16 to i32
   ret i32 %result
 }
 ; CHECK-LABEL: testMul16Imm16
-; CHECK: 66 69 c0 00 04  imul ax,ax
+; CHECK: 66 69 c0 01 04  imul ax,ax
 ; CHECK-NEXT: add ax,0x1
 
 define internal i32 @testMul16Imm16Neg(i32 %arg) {
 entry:
   %arg_i16 = trunc i32 %arg to i16
-  %tmp = mul i16 %arg_i16, -256
+  %tmp = mul i16 %arg_i16, -255
   %result_i16 = add i16 %tmp, 1
   %result = zext i16 %result_i16 to i32
   ret i32 %result
 }
 ; CHECK-LABEL: testMul16Imm16Neg
-; CHECK: 66 69 c0 00 ff  imul ax,ax
+; CHECK: 66 69 c0 01 ff  imul ax,ax,0xff01
 ; CHECK-NEXT: add ax,0x1
 
 define internal i32 @testMul32Imm8(i32 %arg) {
@@ -236,19 +236,19 @@
 
 define internal i32 @testMul32Imm16(i32 %arg) {
 entry:
-  %result = mul i32 %arg, 1024
+  %result = mul i32 %arg, 1025
   ret i32 %result
 }
 ; CHECK-LABEL: testMul32Imm16
-; CHECK: 69 c0 00 04 00 00  imul eax,eax
+; CHECK: 69 c0 01 04 00 00  imul eax,eax
 
 define internal i32 @testMul32Imm16Neg(i32 %arg) {
 entry:
-  %result = mul i32 %arg, -256
+  %result = mul i32 %arg, -255
   ret i32 %result
 }
 ; CHECK-LABEL: testMul32Imm16Neg
-; CHECK: 69 c0 00 ff ff ff  imul eax,eax
+; CHECK: 69 c0 01 ff ff ff  imul eax,eax,0xffffff01
 
 ; The GPR shift instructions either allow an 8-bit immediate or
 ; have a special encoding for "1".
diff --git a/tests_lit/llvm2ice_tests/strength-reduce.ll b/tests_lit/llvm2ice_tests/strength-reduce.ll
new file mode 100644
index 0000000..50ca6e8
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/strength-reduce.ll
@@ -0,0 +1,67 @@
+; This tests various strength reduction operations.
+
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
+; RUN:   --target x8632 -i %s --args -O2 \
+; RUN:   | %if --need=target_X8632 --command FileCheck %s
+
+define internal i32 @mul_i32_arg_5(i32 %arg) {
+  %result = mul i32 %arg, 5
+  ret i32 %result
+}
+; CHECK-LABEL: mul_i32_arg_5
+; CHECK: lea [[REG:e..]],{{\[}}[[REG]]+[[REG]]*4]
+
+define internal i32 @mul_i32_5_arg(i32 %arg) {
+  %result = mul i32 5, %arg
+  ret i32 %result
+}
+; CHECK-LABEL: mul_i32_5_arg
+; CHECK: lea [[REG:e..]],{{\[}}[[REG]]+[[REG]]*4]
+
+define internal i32 @mul_i32_arg_18(i32 %arg) {
+  %result = mul i32 %arg, 18
+  ret i32 %result
+}
+; CHECK-LABEL: mul_i32_arg_18
+; CHECK-DAG: lea [[REG:e..]],{{\[}}[[REG]]+[[REG]]*8]
+; CHECK-DAG: shl [[REG]],1
+
+define internal i32 @mul_i32_arg_27(i32 %arg) {
+  %result = mul i32 %arg, 27
+  ret i32 %result
+}
+; CHECK-LABEL: mul_i32_arg_27
+; CHECK-DAG: lea [[REG:e..]],{{\[}}[[REG]]+[[REG]]*2]
+; CHECK-DAG: lea [[REG]],{{\[}}[[REG]]+[[REG]]*8]
+
+define internal i32 @mul_i32_arg_m45(i32 %arg) {
+  %result = mul i32 %arg, -45
+  ret i32 %result
+}
+; CHECK-LABEL: mul_i32_arg_m45
+; CHECK-DAG: lea [[REG:e..]],{{\[}}[[REG]]+[[REG]]*8]
+; CHECK-DAG: lea [[REG]],{{\[}}[[REG]]+[[REG]]*4]
+; CHECK: neg [[REG]]
+
+define internal i16 @mul_i16_arg_18(i16 %arg) {
+  %result = mul i16 %arg, 18
+  ret i16 %result
+}
+; Disassembly will look like "lea ax,[eax+eax*8]".
+; CHECK-LABEL: mul_i16_arg_18
+; CHECK-DAG: lea [[REG:..]],{{\[}}e[[REG]]+e[[REG]]*8]
+; CHECK-DAG: shl [[REG]],1
+
+define internal i8 @mul_i8_arg_16(i8 %arg) {
+  %result = mul i8 %arg, 16
+  ret i8 %result
+}
+; CHECK-LABEL: mul_i8_arg_16
+; CHECK: shl {{.*}},0x4
+
+define internal i8 @mul_i8_arg_18(i8 %arg) {
+  %result = mul i8 %arg, 18
+  ret i8 %result
+}
+; CHECK-LABEL: mul_i8_arg_18
+; CHECK: imul