Handle mul instructions with two immediates on x86-32 and x86-64

We were hitting an assert in the assembler due to an instruction that
had not been properly legalized. Now we ensure that mul instructions
take at most one immediate.

It is not clear why these instructions were not removed by upstream
optimizations. This is possibly a result of values only being known at
link time but somehow missed by LTO.

This CL includes some minor cleanup as well:
* Running lit tests does not actually require the Subzero runtime, so
  that is removed from the dependencies list.
* Removes unused capture clauses that were causing warnings treated as
  errors on newer compilers.

Reviewed-on: https://chromium-review.googlesource.com/896962
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
Reviewed-by: Karl Schimpf <kschimpf@chromium.org>
Signed-off-by: Antonio Maiorano <amaiorano@google.com>

Bug: b/120208871
Change-Id: Ibf33b6961ee99311ca18c7179c56df28af6faf17
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/49068
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Antonio Maiorano <amaiorano@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/third_party/subzero/src/IceCompileServer.cpp b/third_party/subzero/src/IceCompileServer.cpp
index f4dacb8..570ccfa 100644
--- a/third_party/subzero/src/IceCompileServer.cpp
+++ b/third_party/subzero/src/IceCompileServer.cpp
@@ -255,7 +255,7 @@
   }
 
   if (getFlags().getNumTranslationThreads() != 0) {
-    std::thread CompileThread([this, &Flags, &InputStream]() {
+    std::thread CompileThread([this, &InputStream]() {
       Ctx->initParserThread();
       getCompiler().run(Flags, *Ctx.get(), std::move(InputStream));
     });
diff --git a/third_party/subzero/src/IceTargetLoweringARM32.cpp b/third_party/subzero/src/IceTargetLoweringARM32.cpp
index af41375..bf0a5ce 100644
--- a/third_party/subzero/src/IceTargetLoweringARM32.cpp
+++ b/third_party/subzero/src/IceTargetLoweringARM32.cpp
@@ -5140,7 +5140,7 @@
       _dmb();
       lowerLoadLinkedStoreExclusive(
           DestTy, Instr->getArg(0),
-          [this, Expected, New, Instr, DestTy, &LoadedValue](Variable *Tmp) {
+          [this, Expected, New, &LoadedValue](Variable *Tmp) {
             auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected));
             auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected));
             auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp));
@@ -5171,7 +5171,7 @@
     _dmb();
     lowerLoadLinkedStoreExclusive(
         DestTy, Instr->getArg(0),
-        [this, Expected, New, Instr, DestTy, &LoadedValue](Variable *Tmp) {
+        [this, Expected, New, &LoadedValue](Variable *Tmp) {
           lowerIcmpCond(InstIcmp::Eq, Tmp, Expected);
           LoadedValue = Tmp;
           return New;
diff --git a/third_party/subzero/src/IceTargetLoweringX86BaseImpl.h b/third_party/subzero/src/IceTargetLoweringX86BaseImpl.h
index 41173f5..e231ce9 100644
--- a/third_party/subzero/src/IceTargetLoweringX86BaseImpl.h
+++ b/third_party/subzero/src/IceTargetLoweringX86BaseImpl.h
@@ -2098,6 +2098,7 @@
       _mov(DestLo, T_4Lo);
       _add(T_4Hi, T_1);
       _mov(T_2, Src1Hi);
+      Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
       _imul(T_2, Src0Lo);
       _add(T_4Hi, T_2);
       _mov(DestHi, T_4Hi);
@@ -2332,10 +2333,13 @@
       _mov(Dest, T);
     } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {
       T = makeReg(Ty);
+      Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
       _imul_imm(T, Src0, ImmConst);
       _mov(Dest, T);
     } else {
       _mov(T, Src0);
+      // No need to legalize Src1 to Reg | Mem because the Imm case is handled
+      // already by the ConstantInteger32 case above.
       _imul(T, Src0 == Src1 ? T : Src1);
       _mov(Dest, T);
     }