[SubZero] Fix code generation issues occurred in Cross-test and PNaCL smoke-tests

The patch fixes various code generation issues found during testing of Cross-test and PNaCL smoke-test framework.

     1)	To keep track of branches to same label, relative position of the branch from previous branch is used.
     2)	Fixed encoding of conditional mov instructions
     3)	Added MovFP64ToI64 instruction for f64 to i64 move
     4)	Handled vector-types in Phi nodes
     5)	Fixed alignment of spilled vector arguments on stack
     6)	Save-restore FP registers
     7)	Fixed code generation for Zext and Sext operations
     8)	Fixed InsertElement for vi16x8 type

R=stichnot@chromium.org

Patch from Jaydeep Patil <jaydeep.patil@imgtec.com>.

Review-Url: https://codereview.chromium.org/2619943003 .
diff --git a/src/IceAssemblerMIPS32.cpp b/src/IceAssemblerMIPS32.cpp
index a212457..f1a656d 100644
--- a/src/IceAssemblerMIPS32.cpp
+++ b/src/IceAssemblerMIPS32.cpp
@@ -196,7 +196,10 @@
     IOffsetT Dest = BoundPc - Position;
     IValueT Inst = Buffer.load<IValueT>(Position);
     Buffer.store<IValueT>(Position, encodeBranchOffset(Dest, Inst));
-    L->setPosition(decodeBranchOffset(Inst));
+    IOffsetT NextBrPc = decodeBranchOffset(Inst);
+    if (NextBrPc != 0)
+      NextBrPc = Position - NextBrPc;
+    L->setPosition(NextBrPc);
   }
   L->bindTo(BoundPc);
 }
@@ -428,7 +431,10 @@
     return;
   }
   const IOffsetT Position = Buffer.size();
-  emitBr(CondMIPS32::AL, OpRsNone, OpRtNone, TargetLabel->getEncodedPosition());
+  IOffsetT PrevPosition = TargetLabel->getEncodedPosition();
+  if (PrevPosition != 0)
+    PrevPosition = Position - PrevPosition;
+  emitBr(CondMIPS32::AL, OpRsNone, OpRtNone, PrevPosition);
   TargetLabel->linkTo(*this, Position);
 }
 
@@ -850,7 +856,7 @@
 void AssemblerMIPS32::movn_d(const Operand *OpFd, const Operand *OpFs,
                              const Operand *OpFt) {
   static constexpr IValueT Opcode = 0x44000013;
-  emitCOP1FmtRtFsFd(Opcode, SinglePrecision, OpFd, OpFs, OpFt, "movn.d");
+  emitCOP1FmtRtFsFd(Opcode, DoublePrecision, OpFd, OpFs, OpFt, "movn.d");
 }
 
 void AssemblerMIPS32::movn_s(const Operand *OpFd, const Operand *OpFs,
@@ -879,7 +885,7 @@
 void AssemblerMIPS32::movz_d(const Operand *OpFd, const Operand *OpFs,
                              const Operand *OpFt) {
   static constexpr IValueT Opcode = 0x44000012;
-  emitCOP1FmtFtFsFd(Opcode, SinglePrecision, OpFd, OpFs, OpFt, "movz.d");
+  emitCOP1FmtFtFsFd(Opcode, DoublePrecision, OpFd, OpFs, OpFt, "movz.d");
 }
 
 void AssemblerMIPS32::movz(const Operand *OpRd, const Operand *OpRs,
@@ -1239,7 +1245,10 @@
     return;
   }
   const IOffsetT Position = Buffer.size();
-  emitBr(Cond, OpRs, OpRt, TargetLabel->getEncodedPosition());
+  IOffsetT PrevPosition = TargetLabel->getEncodedPosition();
+  if (PrevPosition != 0)
+    PrevPosition = Position - PrevPosition;
+  emitBr(Cond, OpRs, OpRt, PrevPosition);
   TargetLabel->linkTo(*this, Position);
 }
 
@@ -1252,7 +1261,10 @@
     return;
   }
   const IOffsetT Position = Buffer.size();
-  emitBr(Cond, OpRs, OpRtNone, TargetLabel->getEncodedPosition());
+  IOffsetT PrevPosition = TargetLabel->getEncodedPosition();
+  if (PrevPosition)
+    PrevPosition = Position - PrevPosition;
+  emitBr(Cond, OpRs, OpRtNone, PrevPosition);
   TargetLabel->linkTo(*this, Position);
 }
 
diff --git a/src/IceInstMIPS32.cpp b/src/IceInstMIPS32.cpp
index b6eefe9..1506e02 100644
--- a/src/IceInstMIPS32.cpp
+++ b/src/IceInstMIPS32.cpp
@@ -315,6 +315,13 @@
   }
 }
 
+InstMIPS32MovFP64ToI64::InstMIPS32MovFP64ToI64(Cfg *Func, Variable *Dst,
+                                               Operand *Src,
+                                               Int64Part Int64HiLo)
+    : InstMIPS32(Func, InstMIPS32::Mov_fp, 1, Dst), Int64HiLo(Int64HiLo) {
+  addSource(Src);
+}
+
 InstMIPS32Ret::InstMIPS32Ret(Cfg *Func, Variable *RA, Variable *Source)
     : InstMIPS32(Func, InstMIPS32::Ret, Source ? 2 : 1, nullptr) {
   addSource(RA);
diff --git a/src/IceInstMIPS32.h b/src/IceInstMIPS32.h
index 96e6cb7..aaffbcf 100644
--- a/src/IceInstMIPS32.h
+++ b/src/IceInstMIPS32.h
@@ -29,6 +29,7 @@
 namespace MIPS32 {
 
 enum RelocOp { RO_No, RO_Hi, RO_Lo, RO_Jal };
+enum Int64Part { Int64_Hi, Int64_Lo };
 
 inline void emitRelocOp(Ostream &Str, RelocOp Reloc) {
   switch (Reloc) {
@@ -232,6 +233,7 @@
     Mfhi,
     Mflo,
     Mov, // actually a pseudo op for addi rd, rs, 0
+    Mov_fp,
     Mov_d,
     Mov_s,
     Movf,
@@ -1320,6 +1322,44 @@
   Variable *DestHi = nullptr;
 };
 
+/// Handle double to i64 move
+class InstMIPS32MovFP64ToI64 final : public InstMIPS32 {
+  InstMIPS32MovFP64ToI64() = delete;
+  InstMIPS32MovFP64ToI64(const InstMIPS32MovFP64ToI64 &) = delete;
+  InstMIPS32MovFP64ToI64 &operator=(const InstMIPS32MovFP64ToI64 &) = delete;
+
+public:
+  static InstMIPS32MovFP64ToI64 *create(Cfg *Func, Variable *Dest, Operand *Src,
+                                        Int64Part Int64HiLo) {
+    return new (Func->allocate<InstMIPS32MovFP64ToI64>())
+        InstMIPS32MovFP64ToI64(Func, Dest, Src, Int64HiLo);
+  }
+
+  bool isRedundantAssign() const override {
+    return checkForRedundantAssign(getDest(), getSrc(0));
+  }
+
+  void dump(const Cfg *Func) const override {
+    if (!BuildDefs::dump())
+      return;
+    Ostream &Str = Func->getContext()->getStrDump();
+    getDest()->dump(Func);
+    Str << " = ";
+    dumpOpcode(Str, "mov_fp", getDest()->getType());
+    Str << " ";
+    getSrc(0)->dump(Func);
+  }
+
+  Int64Part getInt64Part() const { return Int64HiLo; }
+
+  static bool classof(const Inst *Inst) { return isClassof(Inst, Mov_fp); }
+
+private:
+  InstMIPS32MovFP64ToI64(Cfg *Func, Variable *Dest, Operand *Src,
+                         Int64Part Int64HiLo);
+  const Int64Part Int64HiLo;
+};
+
 // Declare partial template specializations of emit() methods that already have
 // default implementations. Without this, there is the possibility of ODR
 // violations and link errors.
diff --git a/src/IcePhiLoweringImpl.h b/src/IcePhiLoweringImpl.h
index d6a12b2..b602e81 100644
--- a/src/IcePhiLoweringImpl.h
+++ b/src/IcePhiLoweringImpl.h
@@ -36,7 +36,8 @@
     if (Phi->isDeleted())
       continue;
     Variable *Dest = Phi->getDest();
-    if (Dest->getType() == IceType_i64) {
+    Type DestTy = Dest->getType();
+    if (DestTy == IceType_i64) {
       auto *DestLo = llvm::cast<Variable>(Target->loOperand(Dest));
       auto *DestHi = llvm::cast<Variable>(Target->hiOperand(Dest));
       auto *PhiLo = InstPhi::create(Func, Phi->getSrcSize(), DestLo);
@@ -51,6 +52,23 @@
       Node->getPhis().push_back(PhiLo);
       Node->getPhis().push_back(PhiHi);
       Phi->setDeleted();
+    } else if (isVectorType(DestTy) &&
+               Target->shouldSplitToVariableVecOn32(DestTy)) {
+      auto *DstVec = llvm::cast<VariableVecOn32>(Dest);
+      SizeT Idx = 0;
+      for (Variable *DestElem : DstVec->getContainers()) {
+        auto *PhiElem = InstPhi::create(Func, Phi->getSrcSize(), DestElem);
+        for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
+          Operand *Src = Phi->getSrc(I);
+          CfgNode *Label = Phi->getLabel(I);
+          Src = Target->legalizeUndef(Src);
+          auto *SrcVec = llvm::cast<VariableVecOn32>(Src);
+          PhiElem->addArgument(SrcVec->getContainers()[Idx], Label);
+        }
+        ++Idx;
+        Node->getPhis().push_back(PhiElem);
+      }
+      Phi->setDeleted();
     }
   }
 }
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 36650bd..acbe422 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -97,7 +97,7 @@
   size_t typeAlignInBytes = typeWidthInBytes(Ty);
   // Vectors are stored on stack with the same alignment as that of int type
   if (isVectorType(Ty))
-    typeAlignInBytes = typeWidthInBytes(IceType_i32);
+    typeAlignInBytes = typeWidthInBytes(IceType_i64);
   return Utils::applyAlignment(Value, typeAlignInBytes);
 }
 
@@ -240,7 +240,7 @@
       // If PartialOnStack is true and if this is a vector type then last two
       // elements are on stack
       if (PartialOnStack && isVectorType(Ty)) {
-        OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, IceType_i32);
+        OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, IceType_i64);
         OutArgsSizeBytes += typeWidthInBytesOnStack(IceType_i32) * 2;
       }
       continue;
@@ -987,7 +987,6 @@
       auto *Reg = llvm::cast<VariableVecOn32>(Var);
       Reg->initVecElement(Func);
       auto *Zero = getZero();
-      Context.insert<InstFakeDef>(Zero);
       for (Variable *Var : Reg->getContainers()) {
         _mov(Var, Zero);
       }
@@ -1475,7 +1474,7 @@
   // prolog/epilog.
   using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
   const RegClassType RegClass = RegClassType(
-      RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_GPR_Last, &PreservedGPRs);
+      RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_FPR_Last, &PreservedGPRs);
   const uint32_t FirstRegInClass = std::get<0>(RegClass);
   const uint32_t LastRegInClass = std::get<1>(RegClass);
   VarList *const PreservedRegsInClass = std::get<2>(RegClass);
@@ -1520,7 +1519,8 @@
   // Combine fixed alloca with SpillAreaSize.
   SpillAreaSizeBytes += FixedAllocaSizeBytes;
 
-  TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes;
+  TotalStackSizeBytes =
+      applyStackAlignment(PreservedRegsSizeBytes + SpillAreaSizeBytes);
 
   // Generate "addiu sp, sp, -TotalStackSizeBytes"
   if (TotalStackSizeBytes) {
@@ -1533,11 +1533,16 @@
   if (!PreservedGPRs.empty()) {
     uint32_t StackOffset = TotalStackSizeBytes;
     for (Variable *Var : *PreservedRegsInClass) {
-      Variable *PhysicalRegister = getPhysicalRegister(Var->getRegNum());
-      StackOffset -= typeWidthInBytesOnStack(PhysicalRegister->getType());
+      Type RegType;
+      if (RegMIPS32::isFPRReg(Var->getRegNum()))
+        RegType = IceType_f32;
+      else
+        RegType = IceType_i32;
+      auto *PhysicalRegister = makeReg(RegType, Var->getRegNum());
+      StackOffset -= typeWidthInBytesOnStack(RegType);
       Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
       OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
-          Func, IceType_i32, SP,
+          Func, RegType, SP,
           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
       Sandboxer(this).sw(PhysicalRegister, MemoryLocation);
     }
@@ -1652,10 +1657,15 @@
     uint32_t StackOffset = TotalStackSizeBytes - PreservedRegsSizeBytes;
     for (RIter = PreservedGPRs.rbegin(), END = PreservedGPRs.rend();
          RIter != END; ++RIter) {
-      Variable *PhysicalRegister = getPhysicalRegister((*RIter)->getRegNum());
+      Type RegType;
+      if (RegMIPS32::isFPRReg((*RIter)->getRegNum()))
+        RegType = IceType_f32;
+      else
+        RegType = IceType_i32;
+      auto *PhysicalRegister = makeReg(RegType, (*RIter)->getRegNum());
       Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
       OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
-          Func, IceType_i32, SP,
+          Func, RegType, SP,
           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
       _lw(PhysicalRegister, MemoryLocation);
       StackOffset += typeWidthInBytesOnStack(PhysicalRegister->getType());
@@ -1703,6 +1713,39 @@
   return ScratchReg;
 }
 
+void TargetMIPS32::PostLoweringLegalizer::legalizeMovFp(
+    InstMIPS32MovFP64ToI64 *MovInstr) {
+  Variable *Dest = MovInstr->getDest();
+  Operand *Src = MovInstr->getSrc(0);
+  const Type SrcTy = Src->getType();
+
+  if (Dest != nullptr && SrcTy == IceType_f64) {
+    int32_t Offset = Dest->getStackOffset();
+    auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
+    OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
+        Target->Func, IceType_f32, Base,
+        llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
+    OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
+    auto *SrcV = llvm::cast<Variable>(Src);
+    Variable *SrcR;
+    if (MovInstr->getInt64Part() == Int64_Lo) {
+      SrcR = Target->makeReg(
+          IceType_f32, RegMIPS32::get64PairFirstRegNum(SrcV->getRegNum()));
+    } else {
+      SrcR = Target->makeReg(
+          IceType_f32, RegMIPS32::get64PairSecondRegNum(SrcV->getRegNum()));
+    }
+    Sandboxer(Target).sw(SrcR, Addr);
+    if (MovInstr->isDestRedefined()) {
+      Target->_set_dest_redefined();
+    }
+    MovInstr->setDeleted();
+    return;
+  }
+
+  llvm::report_fatal_error("legalizeMovFp: Invalid operands");
+}
+
 void TargetMIPS32::PostLoweringLegalizer::legalizeMov(InstMIPS32Mov *MovInstr) {
   Variable *Dest = MovInstr->getDest();
   assert(Dest != nullptr);
@@ -1747,8 +1790,8 @@
             DstFPRLo = Target->makeReg(
                 IceType_i32, RegMIPS32::get64PairSecondRegNum(DRegNum));
           }
-          Target->_mov(DstFPRHi, SrcGPRLo);
-          Target->_mov(DstFPRLo, SrcGPRHi);
+          Target->_mov(DstFPRHi, SrcGPRHi);
+          Target->_mov(DstFPRLo, SrcGPRLo);
           Legalized = true;
         } else {
           Variable *SrcGPR = Target->makeReg(IceType_f32, SRegNum);
@@ -1860,9 +1903,10 @@
 
       // ExtraOffset is only needed for stack-pointer based frames as we have
       // to account for spill storage.
-      const int32_t ExtraOffset = (Var->getRegNum() == Target->getStackReg())
-                                      ? Target->getFrameFixedAllocaOffset()
-                                      : 0;
+      const int32_t ExtraOffset =
+          (Var->getRegNum() == Target->getFrameOrStackReg())
+              ? Target->getFrameFixedAllocaOffset()
+              : 0;
 
       const int32_t Offset = Var->getStackOffset() + ExtraOffset;
       Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
@@ -2018,6 +2062,10 @@
         Legalizer.legalizeMov(MovInstr);
         continue;
       }
+      if (auto *MovInstr = llvm::dyn_cast<InstMIPS32MovFP64ToI64>(CurInstr)) {
+        Legalizer.legalizeMovFp(MovInstr);
+        continue;
+      }
       if (llvm::isa<InstMIPS32Sw>(CurInstr)) {
         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
           Sandboxer(this).sw(Src0V, LegalMem);
@@ -2608,15 +2656,24 @@
 
   switch (Instr->getOp()) {
   case InstArithmetic::Add:
+  case InstArithmetic::Sub: {
+    auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
+    if (Const32 != nullptr && isInt<16>(int32_t(Const32->getValue()))) {
+      IsSrc1Imm16 = true;
+      Value = Const32->getValue();
+    } else {
+      Src1R = legalizeToReg(Src1);
+    }
+    break;
+  }
   case InstArithmetic::And:
   case InstArithmetic::Or:
   case InstArithmetic::Xor:
-  case InstArithmetic::Sub:
   case InstArithmetic::Shl:
   case InstArithmetic::Lshr:
   case InstArithmetic::Ashr: {
     auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
-    if (Const32 != nullptr && isInt<16>(int32_t(Const32->getValue()))) {
+    if (Const32 != nullptr && llvm::isUInt<16>(uint32_t(Const32->getValue()))) {
       IsSrc1Imm16 = true;
       Value = Const32->getValue();
     } else {
@@ -2633,14 +2690,25 @@
   switch (Instr->getOp()) {
   case InstArithmetic::_num:
     break;
-  case InstArithmetic::Add:
+  case InstArithmetic::Add: {
+    auto *T0R = Src0R;
+    auto *T1R = Src1R;
+    if (Dest->getType() != IceType_i32) {
+      T0R = makeReg(IceType_i32);
+      lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
+      if (!IsSrc1Imm16) {
+        T1R = makeReg(IceType_i32);
+        lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
+      }
+    }
     if (IsSrc1Imm16) {
-      _addiu(T, Src0R, Value);
+      _addiu(T, T0R, Value);
     } else {
-      _addu(T, Src0R, Src1R);
+      _addu(T, T0R, T1R);
     }
     _mov(Dest, T);
     return;
+  }
   case InstArithmetic::And:
     if (IsSrc1Imm16) {
       _andi(T, Src0R, Value);
@@ -2665,14 +2733,25 @@
     }
     _mov(Dest, T);
     return;
-  case InstArithmetic::Sub:
+  case InstArithmetic::Sub: {
+    auto *T0R = Src0R;
+    auto *T1R = Src1R;
+    if (Dest->getType() != IceType_i32) {
+      T0R = makeReg(IceType_i32);
+      lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
+      if (!IsSrc1Imm16) {
+        T1R = makeReg(IceType_i32);
+        lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
+      }
+    }
     if (IsSrc1Imm16) {
-      _addiu(T, Src0R, -Value);
+      _addiu(T, T0R, -Value);
     } else {
-      _subu(T, Src0R, Src1R);
+      _subu(T, T0R, T1R);
     }
     _mov(Dest, T);
     return;
+  }
   case InstArithmetic::Mul: {
     _mul(T, Src0R, Src1R);
     _mov(Dest, T);
@@ -2707,10 +2786,20 @@
     return;
   }
   case InstArithmetic::Ashr: {
+    auto *T0R = Src0R;
+    auto *T1R = Src1R;
+    if (Dest->getType() != IceType_i32) {
+      T0R = makeReg(IceType_i32);
+      lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
+      if (!IsSrc1Imm16) {
+        T1R = makeReg(IceType_i32);
+        lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
+      }
+    }
     if (IsSrc1Imm16) {
-      _sra(T, Src0R, Value);
+      _sra(T, T0R, Value);
     } else {
-      _srav(T, Src0R, Src1R);
+      _srav(T, T0R, T1R);
     }
     _mov(Dest, T);
     return;
@@ -2733,8 +2822,16 @@
   }
   case InstArithmetic::Sdiv: {
     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
-    _div(T_Zero, Src0R, Src1R);
-    _teq(Src1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
+    auto *T0R = Src0R;
+    auto *T1R = Src1R;
+    if (Dest->getType() != IceType_i32) {
+      T0R = makeReg(IceType_i32);
+      lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
+      T1R = makeReg(IceType_i32);
+      lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
+    }
+    _div(T_Zero, T0R, T1R);
+    _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
     _mflo(T, T_Zero);
     _mov(Dest, T);
     return;
@@ -2757,8 +2854,16 @@
   }
   case InstArithmetic::Srem: {
     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
-    _div(T_Zero, Src0R, Src1R);
-    _teq(Src1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
+    auto *T0R = Src0R;
+    auto *T1R = Src1R;
+    if (Dest->getType() != IceType_i32) {
+      T0R = makeReg(IceType_i32);
+      lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
+      T1R = makeReg(IceType_i32);
+      lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
+    }
+    _div(T_Zero, T0R, T1R);
+    _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
     _mfhi(T, T_Zero);
     _mov(Dest, T);
     return;
@@ -2905,8 +3010,32 @@
       Src0HiR = legalizeToReg(hiOperand(Src0));
       Src1HiR = legalizeToReg(hiOperand(Src1));
     } else {
-      Src0R = legalizeToReg(Src0);
-      Src1R = legalizeToReg(Src1);
+      auto *Src0RT = legalizeToReg(Src0);
+      auto *Src1RT = legalizeToReg(Src1);
+      // Sign/Zero extend the source operands
+      if (Src0Ty != IceType_i32) {
+        InstCast::OpKind CastKind;
+        switch (CompareInst->getCondition()) {
+        case InstIcmp::Eq:
+        case InstIcmp::Ne:
+        case InstIcmp::Sgt:
+        case InstIcmp::Sge:
+        case InstIcmp::Slt:
+        case InstIcmp::Sle:
+          CastKind = InstCast::Sext;
+          break;
+        default:
+          CastKind = InstCast::Zext;
+          break;
+        }
+        Src0R = makeReg(IceType_i32);
+        Src1R = makeReg(IceType_i32);
+        lowerCast(InstCast::create(Func, CastKind, Src0R, Src0RT));
+        lowerCast(InstCast::create(Func, CastKind, Src1R, Src1RT));
+      } else {
+        Src0R = Src0RT;
+        Src1R = Src1RT;
+      }
     }
     auto *DestT = makeReg(IceType_i32);
 
@@ -3144,9 +3273,9 @@
     if (!InReg) {
       if (isVectorType(Ty)) {
         auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
+        ParameterAreaSizeBytes =
+            applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
         for (Variable *Elem : ArgVec->getContainers()) {
-          ParameterAreaSizeBytes =
-              applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i32);
           StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes));
           ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
         }
@@ -3179,11 +3308,9 @@
             std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3)));
       } else {
         ParameterAreaSizeBytes =
-            applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i32);
+            applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
         StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes));
         ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
-        ParameterAreaSizeBytes =
-            applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i32);
         StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes));
         ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
       }
@@ -3484,7 +3611,20 @@
       Src0 = loOperand(Src0);
     Variable *Src0R = legalizeToReg(Src0);
     Variable *T = makeReg(DestTy);
-    _mov(T, Src0R);
+    switch (DestTy) {
+    case IceType_i1:
+      _andi(T, Src0R, 0x1);
+      break;
+    case IceType_i8:
+      _andi(T, Src0R, 0xff);
+      break;
+    case IceType_i16:
+      _andi(T, Src0R, 0xffff);
+      break;
+    default:
+      _mov(T, Src0R);
+      break;
+    }
     _mov(Dest, T);
     break;
   }
@@ -3540,9 +3680,12 @@
     if (Src0Ty != IceType_i64) {
       Variable *Src0R = legalizeToReg(Src0);
       auto *T0R = Src0R;
-      if (Src0Ty != IceType_i32 && CastKind == InstCast::Uitofp) {
+      if (Src0Ty != IceType_i32) {
         T0R = makeReg(IceType_i32);
-        lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
+        if (CastKind == InstCast::Uitofp)
+          lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
+        else
+          lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
       }
       if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f32) {
         Variable *FTmp1 = makeReg(IceType_f32);
@@ -3604,9 +3747,12 @@
       Variable *Src0R = legalizeToReg(Src0);
       auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
       T->initHiLo(Func);
-      T->getHi()->setMustHaveReg();
-      T->getLo()->setMustHaveReg();
-      _mov(T, Src0R);
+      T->getHi()->setMustNotHaveReg();
+      T->getLo()->setMustNotHaveReg();
+      Context.insert<InstFakeDef>(T->getHi());
+      Context.insert<InstFakeDef>(T->getLo());
+      _mov_fp64_to_i64(T->getHi(), Src0R, Int64_Hi);
+      _mov_fp64_to_i64(T->getLo(), Src0R, Int64_Lo);
       lowerAssign(InstAssign::create(Func, Dest, T));
       break;
     }
@@ -4178,12 +4324,12 @@
     auto *DstE = TVDest->getContainers()[Index / ElemPerCont];
     // Element to insert
     auto *Src1R = legalizeToReg(Instr->getSrc(1));
-    auto *TReg1 = makeReg(Src1R->getType());
-    auto *TReg2 = makeReg(Src1R->getType());
-    auto *TReg3 = makeReg(Src1R->getType());
-    auto *TReg4 = makeReg(Src1R->getType());
-    auto *TReg5 = makeReg(Src1R->getType());
-    auto *TDReg = makeReg(Src1R->getType());
+    auto *TReg1 = makeReg(IceType_i32);
+    auto *TReg2 = makeReg(IceType_i32);
+    auto *TReg3 = makeReg(IceType_i32);
+    auto *TReg4 = makeReg(IceType_i32);
+    auto *TReg5 = makeReg(IceType_i32);
+    auto *TDReg = makeReg(IceType_i32);
     // Position of the element in the container
     uint32_t PosInCont = Index % ElemPerCont;
     // Load source vector in a temporary vector
@@ -4248,7 +4394,7 @@
         _mov(DstE, TDReg);
         break;
       case 3:
-        _srl(TReg1, Src1R, 24); // Position in the destination
+        _sll(TReg1, Src1R, 24); // Position in the destination
         _sll(TReg2, SrcE, 8);
         _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element
         _or(TDReg, TReg1, TReg3);
@@ -5729,8 +5875,8 @@
       } else {
         uint32_t UpperBits = (Value >> 16) & 0xFFFF;
         uint32_t LowerBits = Value & 0xFFFF;
-        Variable *TReg = makeReg(Ty, RegNum);
         if (LowerBits) {
+          Variable *TReg = makeReg(Ty, RegNum);
           _lui(TReg, Ctx->getConstantInt32(UpperBits));
           _ori(Reg, TReg, LowerBits);
         } else {
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index 6583464..6f47e21 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -392,6 +392,11 @@
     }
   }
 
+  void _mov_fp64_to_i64(Variable *Dest, Operand *Src, Int64Part Int64HiLo) {
+    assert(Dest != nullptr);
+    Context.insert<InstMIPS32MovFP64ToI64>(Dest, Src, Int64HiLo);
+  }
+
   void _mov_d(Variable *Dest, Variable *Src) {
     Context.insert<InstMIPS32Mov_d>(Dest, Src);
   }
@@ -659,7 +664,9 @@
   Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT());
 
   Variable *getZero() {
-    return getPhysicalRegister(RegMIPS32::Reg_ZERO, IceType_i32);
+    auto *Zero = makeReg(IceType_i32, RegMIPS32::Reg_ZERO);
+    Context.insert<InstFakeDef>(Zero);
+    return Zero;
   }
 
   Variable *I32Reg(RegNumT RegNum = RegNumT()) {
@@ -809,6 +816,7 @@
     ///
     /// Moves to memory become store instructions, and moves from memory, loads.
     void legalizeMov(InstMIPS32Mov *Mov);
+    void legalizeMovFp(InstMIPS32MovFP64ToI64 *MovInstr);
 
   private:
     /// Creates a new Base register centered around [Base, +/- Offset].
diff --git a/tests_lit/assembler/mips32/encoding_intrinsics.ll b/tests_lit/assembler/mips32/encoding_intrinsics.ll
index 503826e..7d18f3e 100644
--- a/tests_lit/assembler/mips32/encoding_intrinsics.ll
+++ b/tests_lit/assembler/mips32/encoding_intrinsics.ll
@@ -41,6 +41,7 @@
 
 ; ASM-LABEL: encBswap16
 ; ASM-NEXT: .LencBswap16$entry:
+; ASM-NEXT: 	andi	$a0, $a0, 65535
 ; ASM-NEXT: 	sll	$v0, $a0, 8
 ; ASM-NEXT: 	lui	$v1, 255
 ; ASM-NEXT: 	and	$v0, $v0, $v1
@@ -51,6 +52,7 @@
 ; ASM-NEXT: 	jr	$ra
 
 ; DIS-LABEL: <encBswap16>:
+; DIS-NEXT:  3084ffff 	andi	a0,a0,0xffff
 ; DIS-NEXT:  00041200 	sll	v0,a0,0x8
 ; DIS-NEXT:  3c0300ff 	lui	v1,0xff
 ; DIS-NEXT:  00431024 	and	v0,v0,v1
@@ -62,6 +64,10 @@
 
 ; IASM-LABEL: encBswap16
 ; IASM-NEXT: .LencBswap16$entry:
+; IASM-NEXT: 	.byte 0xff
+; IASM-NEXT: 	.byte 0xff
+; IASM-NEXT: 	.byte 0x84
+; IASM-NEXT: 	.byte 0x30
 ; IASM-NEXT: 	.byte 0x0
 ; IASM-NEXT: 	.byte 0x12
 ; IASM-NEXT: 	.byte 0x4
@@ -576,7 +582,7 @@
 ; ASM-NEXT: 	clz	$a0, $a0
 ; ASM-NEXT: 	addiu	$a0, $a0, 32
 ; ASM-NEXT: 	movn	$a0, $v0, $a1
-; ASM-NEXT: 	addiu	$v0, $zero, 0
+; ASM:      	addiu	$v0, $zero, 0
 ; ASM-NEXT: 	move	$v1, $v0
 ; ASM-NEXT: 	move	$v0, $a0
 ; ASM-NEXT: 	jr	$ra
@@ -705,10 +711,10 @@
 ; ASM-LABEL: encCttz32
 ; ASM-NEXT: .LencCttz32$entry:
 ; ASM-NEXT: 	addiu	$v0, $a0, -1
-; ASM-NEXT: 	nor	$a0, $a0, $zero
+; ASM: 	nor	$a0, $a0, $zero
 ; ASM-NEXT: 	and	$a0, $a0, $v0
 ; ASM-NEXT: 	clz	$a0, $a0
-; ASM-NEXT: 	addiu	$v0, $zero, 32
+; ASM: 	addiu	$v0, $zero, 32
 ; ASM-NEXT: 	subu	$v0, $v0, $a0
 ; ASM-NEXT: 	jr	$ra
 
@@ -763,10 +769,10 @@
 ; ASM-NEXT: 	lui     $v0, 1
 ; ASM-NEXT: 	ori     $v0, $v0, 57920
 ; ASM-NEXT: 	addiu   $v1, $v0, -1
-; ASM-NEXT: 	nor     $v0, $v0, $zero
+; ASM: 	nor     $v0, $v0, $zero
 ; ASM-NEXT: 	and     $v0, $v0, $v1
 ; ASM-NEXT: 	clz     $v0, $v0
-; ASM-NEXT: 	addiu   $v1, $zero, 32
+; ASM: 	addiu   $v1, $zero, 32
 ; ASM-NEXT: 	subu    $v1, $v1, $v0
 ; ASM-NEXT: 	move    $v0, $v1
 ; ASM-NEXT: 	jr      $ra
@@ -835,19 +841,19 @@
 ; ASM-LABEL: encCttz64
 ; ASM-NEXT: .LencCttz64$entry:
 ; ASM-NEXT: 	addiu   $v0, $a1, -1
-; ASM-NEXT: 	nor     $a1, $a1, $zero
+; ASM: 	nor     $a1, $a1, $zero
 ; ASM-NEXT: 	and     $a1, $a1, $v0
 ; ASM-NEXT: 	clz     $a1, $a1
-; ASM-NEXT: 	addiu   $v0, $zero, 64
+; ASM: 	addiu   $v0, $zero, 64
 ; ASM-NEXT: 	subu    $v0, $v0, $a1
 ; ASM-NEXT: 	addiu   $v1, $a0, -1
-; ASM-NEXT: 	nor     $a1, $a0, $zero
+; ASM: 	nor     $a1, $a0, $zero
 ; ASM-NEXT: 	and     $a1, $a1, $v1
 ; ASM-NEXT: 	clz     $a1, $a1
-; ASM-NEXT: 	addiu   $v1, $zero, 32
+; ASM: 	addiu   $v1, $zero, 32
 ; ASM-NEXT: 	subu    $v1, $v1, $a1
 ; ASM-NEXT: 	movn    $v0, $v1, $a0
-; ASM-NEXT: 	addiu   $v1, $zero, 0
+; ASM: 	addiu   $v1, $zero, 0
 ; ASM-NEXT: 	jr      $ra
 
 ; DIS-LABEL:  <encCttz64>:
@@ -944,19 +950,19 @@
 ; ASM-NEXT: 	lui     $v1, 48793
 ; ASM-NEXT: 	ori     $v1, $v1, 6676
 ; ASM-NEXT: 	addiu   $a0, $v0, -1
-; ASM-NEXT: 	nor     $v0, $v0, $zero
+; ASM: 	nor     $v0, $v0, $zero
 ; ASM-NEXT: 	and     $v0, $v0, $a0
 ; ASM-NEXT: 	clz     $v0, $v0
-; ASM-NEXT: 	addiu   $a0, $zero, 64
+; ASM: 	addiu   $a0, $zero, 64
 ; ASM-NEXT: 	subu    $a0, $a0, $v0
 ; ASM-NEXT: 	addiu   $v0, $v1, -1
-; ASM-NEXT: 	nor     $a1, $v1, $zero
+; ASM: 	nor     $a1, $v1, $zero
 ; ASM-NEXT: 	and     $a1, $a1, $v0
 ; ASM-NEXT: 	clz     $a1, $a1
-; ASM-NEXT: 	addiu   $v0, $zero, 32
+; ASM: 	addiu   $v0, $zero, 32
 ; ASM-NEXT: 	subu    $v0, $v0, $a1
 ; ASM-NEXT: 	movn    $a0, $v0, $v1
-; ASM-NEXT: 	addiu   $v0, $zero, 0
+; ASM: 	addiu   $v0, $zero, 0
 ; ASM-NEXT: 	move    $v1, $v0
 ; ASM-NEXT: 	move    $v0, $a0
 ; ASM-NEXT: 	jr      $ra
@@ -1072,7 +1078,7 @@
 
 ; ASM-LABEL: encTrap
 ; ASM-NEXT: .LencTrap$__0:
-; ASM-NEXT: 	teq	$zero, $zero, 0
+; ASM: 	teq	$zero, $zero, 0
 
 ; DIS-LABEL: <encTrap>:
 ; DIS-NEXT:  00000034 	teq	zero,zero
diff --git a/tests_lit/assembler/mips32/encoding_test_arith_fp.ll b/tests_lit/assembler/mips32/encoding_test_arith_fp.ll
index 91fb8da..4e2ea84 100644
--- a/tests_lit/assembler/mips32/encoding_test_arith_fp.ll
+++ b/tests_lit/assembler/mips32/encoding_test_arith_fp.ll
@@ -478,25 +478,41 @@
 }
 ; ASM-LABEL: cast_d2ll_const
 ; ASM-LABEL: .Lcast_d2ll_const$entry:
-; ASM-NEXT:	lui $[[REG:.*]], %hi({{.*}})
+; ASM:	lui $[[REG:.*]], %hi({{.*}})
 ; ASM-NEXT:	ldc1 $[[FREG:.*]], %lo({{.*}})($[[REG]])
 
-; DIS-LABEL: 000000c0 <cast_d2ll_const>:
-; DIS-NEXT:  c0: 3c020000  lui v0,0x0
-; DIS-NEXT:  c4: d4400000  ldc1 $f0,0(v0)
+; DIS-LABEL: <cast_d2ll_const>:
+; DIS:  3c020000  lui v0,0x0
+; DIS-NEXT:  d4400000  ldc1 $f0,0(v0)
 
 ; IASM-LABEL: cast_d2ll_const:
 ; IASM-LABEL: .Lcast_d2ll_const$entry:
+; IASM-NEXT:	.byte 0xf0
+; IASM-NEXT:	.byte 0xff
+; IASM-NEXT:	.byte 0xbd
+; IASM-NEXT:	.byte 0x27
 ; IASM-NEXT:	.word 0x3c020000 # R_MIPS_HI16 [[LAB:.*]]
 ; IASM-NEXT:	.word 0xd4400000 # R_MIPS_LO16 [[LAB]]
 ; IASM-NEXT:	.byte 0x0
-; IASM-NEXT:	.byte 0x8
-; IASM-NEXT:	.byte 0x3
-; IASM-NEXT:	.byte 0x44
+; IASM-NEXT:	.byte 0x0
+; IASM-NEXT:	.byte 0xa1
+; IASM-NEXT:	.byte 0xe7
+; IASM-NEXT:	.byte 0x4
+; IASM-NEXT:	.byte 0x0
+; IASM-NEXT:	.byte 0xa0
+; IASM-NEXT:	.byte 0xe7
+; IASM-NEXT:	.byte 0x4
+; IASM-NEXT:	.byte 0x0
+; IASM-NEXT:	.byte 0xa2
+; IASM-NEXT:	.byte 0x8f
 ; IASM-NEXT:	.byte 0x0
 ; IASM-NEXT:	.byte 0x0
-; IASM-NEXT:	.byte 0x2
-; IASM-NEXT:	.byte 0x44
+; IASM-NEXT:	.byte 0xa3
+; IASM-NEXT:	.byte 0x8f
+; IASM-NEXT:	.byte 0x10
+; IASM-NEXT:	.byte 0x0
+; IASM-NEXT:	.byte 0xbd
+; IASM-NEXT:	.byte 0x27
 ; IASM-NEXT:	.byte 0x8
 ; IASM-NEXT:	.byte 0x0
 ; IASM-NEXT:	.byte 0xe0
@@ -505,7 +521,14 @@
 ; IASM-NEXT:	.byte 0x0
 ; IASM-NEXT:	.byte 0x0
 ; IASM-NEXT:	.byte 0x0
-
+; IASM-NEXT:	.byte 0x34
+; IASM-NEXT:	.byte 0x0
+; IASM-NEXT:	.byte 0x0
+; IASM-NEXT:	.byte 0x0
+; IASM-NEXT:	.byte 0x34
+; IASM-NEXT:	.byte 0x0
+; IASM-NEXT:	.byte 0x0
+; IASM-NEXT:	.byte 0x0
 
 declare void @bar(i32 %a1, i32 %a2)
 define internal void @Call() {
@@ -515,8 +538,8 @@
 ; ASM-LABEL: Call
 ; ASM: jal	bar
 
-; DIS-LABEL: 000000e0 <Call>:
-; DIS: f0: 0c000000  jal     0
+; DIS-LABEL: 000000f0 <Call>:
+; DIS: 100:	0c000000  jal     0
 
 ; IASM-LABEL: Call:
 ; IASM:	.word 0xc000000 # R_MIPS_26 bar
diff --git a/tests_lit/assembler/mips32/encoding_test_fcmp.ll b/tests_lit/assembler/mips32/encoding_test_fcmp.ll
index f03d7f9..4a2bc81 100644
--- a/tests_lit/assembler/mips32/encoding_test_fcmp.ll
+++ b/tests_lit/assembler/mips32/encoding_test_fcmp.ll
@@ -31,7 +31,7 @@
 
 ; ASM-LABEL: fcmpFalseFloat:
 ; ASM-NEXT: .LfcmpFalseFloat$entry:
-; ASM-NEXT: 	addiu	$v0, $zero, 0
+; ASM: 	addiu	$v0, $zero, 0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -64,7 +64,7 @@
 
 ; ASM-LABEL: fcmpFalseDouble:
 ; ASM-NEXT: .LfcmpFalseDouble$entry:
-; ASM-NEXT: 	addiu	$v0, $zero, 0
+; ASM: 	addiu	$v0, $zero, 0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -97,9 +97,9 @@
 
 ; ASM-LABEL: fcmpOeqFloat
 ; ASM-NEXT: .LfcmpOeqFloat$entry:
-; ASM-NEXT: 	c.eq.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.eq.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -142,9 +142,9 @@
 
 ; ASM-LABEL: fcmpOeqDouble
 ; ASM-NEXT: .LfcmpOeqDouble$entry:
-; ASM-NEXT: 	c.eq.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.eq.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -187,9 +187,9 @@
 
 ; ASM-LABEL: fcmpOgtFloat
 ; ASM-NEXT: .LfcmpOgtFloat$entry:
-; ASM-NEXT: 	c.ule.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.ule.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -232,9 +232,9 @@
 
 ; ASM-LABEL: fcmpOgtDouble
 ; ASM-NEXT: .LfcmpOgtDouble$entry:
-; ASM-NEXT: 	c.ule.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.ule.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -277,9 +277,9 @@
 
 ; ASM-LABEL: fcmpOgeFloat
 ; ASM-NEXT: .LfcmpOgeFloat$entry:
-; ASM-NEXT: 	c.ult.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.ult.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -322,9 +322,9 @@
 
 ; ASM-LABEL: fcmpOgeDouble
 ; ASM-NEXT: .LfcmpOgeDouble$entry:
-; ASM-NEXT: 	c.ult.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.ult.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -367,9 +367,9 @@
 
 ; ASM-LABEL: fcmpOltFloat
 ; ASM-NEXT: .LfcmpOltFloat$entry:
-; ASM-NEXT: 	c.olt.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.olt.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -412,9 +412,9 @@
 
 ; ASM-LABEL: fcmpOltDouble
 ; ASM-NEXT: .LfcmpOltDouble$entry:
-; ASM-NEXT: 	c.olt.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.olt.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -457,9 +457,9 @@
 
 ; ASM-LABEL: fcmpOleFloat
 ; ASM-NEXT: .LfcmpOleFloat$entry:
-; ASM-NEXT: 	c.ole.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.ole.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -502,9 +502,9 @@
 
 ; ASM-LABEL: fcmpOleDouble
 ; ASM-NEXT: .LfcmpOleDouble$entry:
-; ASM-NEXT: 	c.ole.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.ole.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -547,9 +547,9 @@
 
 ; ASM-LABEL: fcmpOneFloat
 ; ASM-NEXT: .LfcmpOneFloat$entry:
-; ASM-NEXT: 	c.ueq.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.ueq.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -592,9 +592,9 @@
 
 ; ASM-LABEL: fcmpOneDouble
 ; ASM-NEXT: .LfcmpOneDouble$entry:
-; ASM-NEXT: 	c.ueq.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.ueq.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -637,9 +637,9 @@
 
 ; ASM-LABEL: fcmpOrdFloat:
 ; ASM-NEXT: .LfcmpOrdFloat$entry:
-; ASM-NEXT: 	c.un.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.un.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -682,9 +682,9 @@
 
 ; ASM-LABEL: fcmpOrdDouble:
 ; ASM-NEXT: .LfcmpOrdDouble$entry:
-; ASM-NEXT: 	c.un.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.un.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -727,9 +727,9 @@
 
 ; ASM-LABEL: fcmpUeqFloat
 ; ASM-NEXT: .LfcmpUeqFloat$entry:
-; ASM-NEXT: 	c.ueq.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.ueq.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -772,9 +772,9 @@
 
 ; ASM-LABEL: fcmpUeqDouble
 ; ASM-NEXT: .LfcmpUeqDouble$entry:
-; ASM-NEXT: 	c.ueq.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.ueq.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -817,9 +817,9 @@
 
 ; ASM-LABEL: fcmpUgtFloat
 ; ASM-NEXT: .LfcmpUgtFloat$entry:
-; ASM-NEXT: 	c.ole.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.ole.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -862,9 +862,9 @@
 
 ; ASM-LABEL: fcmpUgtDouble
 ; ASM-NEXT: .LfcmpUgtDouble$entry:
-; ASM-NEXT: 	c.ole.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.ole.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -907,9 +907,9 @@
 
 ; ASM-LABEL: fcmpUgeFloat
 ; ASM-NEXT: .LfcmpUgeFloat$entry:
-; ASM-NEXT: 	c.olt.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.olt.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -952,9 +952,9 @@
 
 ; ASM-LABEL: fcmpUgeDouble
 ; ASM-NEXT: .LfcmpUgeDouble$entry:
-; ASM-NEXT: 	c.olt.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.olt.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -997,9 +997,9 @@
 
 ; ASM-LABEL: fcmpUltFloat
 ; ASM-NEXT: .LfcmpUltFloat$entry:
-; ASM-NEXT: 	c.ult.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.ult.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -1042,9 +1042,9 @@
 
 ; ASM-LABEL: fcmpUltDouble
 ; ASM-NEXT: .LfcmpUltDouble$entry:
-; ASM-NEXT: 	c.ult.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.ult.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -1087,9 +1087,9 @@
 
 ; ASM-LABEL: fcmpUleFloat
 ; ASM-NEXT: .LfcmpUleFloat$entry:
-; ASM-NEXT: 	c.ule.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.ule.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -1132,9 +1132,9 @@
 
 ; ASM-LABEL: fcmpUleDouble
 ; ASM-NEXT: .LfcmpUleDouble$entry:
-; ASM-NEXT: 	c.ule.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.ule.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -1177,9 +1177,9 @@
 
 ; ASM-LABEL: fcmpUneFloat
 ; ASM-NEXT: .LfcmpUneFloat$entry:
-; ASM-NEXT: 	c.eq.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.eq.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -1222,9 +1222,9 @@
 
 ; ASM-LABEL: fcmpUneDouble
 ; ASM-NEXT: .LfcmpUneDouble$entry:
-; ASM-NEXT: 	c.eq.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movt	$v0, $zero, $fcc0
+; ASM: 	c.eq.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movt	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -1267,9 +1267,9 @@
 
 ; ASM-LABEL: fcmpUnoFloat
 ; ASM-NEXT: .LfcmpUnoFloat$entry:
-; ASM-NEXT: 	c.un.s	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.un.s	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -1312,9 +1312,9 @@
 
 ; ASM-LABEL: fcmpUnoDouble
 ; ASM-NEXT: .LfcmpUnoDouble$entry:
-; ASM-NEXT: 	c.un.d	$f12, $f14
-; ASM-NEXT: 	addiu	$v0, $zero, 1
-; ASM-NEXT: 	movf	$v0, $zero, $fcc0
+; ASM: 	c.un.d	$f12, $f14
+; ASM: 	addiu	$v0, $zero, 1
+; ASM: 	movf	$v0, $zero, $fcc0
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -1357,7 +1357,7 @@
 
 ; ASM-LABEL: fcmpTrueFloat
 ; ASM-NEXT: .LfcmpTrueFloat$entry:
-; ASM-NEXT: 	addiu	$v0, $zero, 1
+; ASM: 	addiu	$v0, $zero, 1
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
@@ -1390,7 +1390,7 @@
 
 ; ASM-LABEL: fcmpTrueDouble
 ; ASM-NEXT: .LfcmpTrueDouble$entry:
-; ASM-NEXT: 	addiu	$v0, $zero, 1
+; ASM: 	addiu	$v0, $zero, 1
 ; ASM-NEXT: 	andi	$v0, $v0, 1
 ; ASM-NEXT: 	jr	$ra
 
diff --git a/tests_lit/assembler/mips32/encoding_trap.ll b/tests_lit/assembler/mips32/encoding_trap.ll
index 83c0ee5..a65e6a3 100644
--- a/tests_lit/assembler/mips32/encoding_trap.ll
+++ b/tests_lit/assembler/mips32/encoding_trap.ll
@@ -28,7 +28,7 @@
 
 ; ASM-LABEL: encTrap
 ; ASM-NEXT: .LencTrap$__0:
-; ASM-NEXT: 	teq	$zero, $zero, 0
+; ASM: 	teq	$zero, $zero, 0
 
 ; DIS-LABEL: 00000000 <encTrap>:
 ; DIS-NEXT:    0:	00000034 	teq	zero,zero
diff --git a/tests_lit/llvm2ice_tests/alloc.ll b/tests_lit/llvm2ice_tests/alloc.ll
index 8600054..04e5b96 100644
--- a/tests_lit/llvm2ice_tests/alloc.ll
+++ b/tests_lit/llvm2ice_tests/alloc.ll
@@ -62,9 +62,9 @@
 ; ARM32:       bl {{.*}} R_{{.*}}    f1
 
 ; MIPS32-LABEL: fixed_416_align_16
-; MIPS32-OPT2: addiu sp,sp,-436
+; MIPS32-OPT2: addiu sp,sp,-448
 ; MIPS32-OPT2: addiu a0,sp,16
-; MIPS32-OPTM1: addiu sp,sp,-456
+; MIPS32-OPTM1: addiu sp,sp,-464
 ; MIPS32-OPTM1: addiu [[REG:.*]],sp,16
 ; MIPS32-OPTM1: sw [[REG]],{{.*}}
 ; MIPS32-OPTM1: lw a0,{{.*}}
@@ -93,9 +93,9 @@
 ; ARM32:       bl {{.*}} R_{{.*}}    f1
 
 ; MIPS32-LABEL: fixed_416_align_32
-; MIPS32-OPT2: addiu sp,sp,-440
-; MIPS32-OPT2: addiu a0,sp,32
-; MIPS32-OPTM1: addiu sp,sp,-456
+; MIPS32-OPT2: addiu sp,sp,-448
+; MIPS32-OPT2: addiu a0,sp,16
+; MIPS32-OPTM1: addiu sp,sp,-464
 ; MIPS32-OPTM1: addiu [[REG:.*]],sp,32
 ; MIPS32-OPTM1: sw [[REG]],{{.*}}
 ; MIPS32-OPTM1: lw a0,{{.*}}
@@ -127,9 +127,9 @@
 ; ARM32:       bl {{.*}} R_{{.*}}    f1
 
 ; MIPS32-LABEL: fixed_351_align_16
-; MIPS32-OPT2: addiu sp,sp,-372
+; MIPS32-OPT2: addiu sp,sp,-384
 ; MIPS32-OPT2: addiu a0,sp,16
-; MIPS32-OPTM1: addiu sp,sp,-392
+; MIPS32-OPTM1: addiu sp,sp,-400
 ; MIPS32-OPTM1: addiu [[REG:.*]],sp,16
 ; MIPS32-OPTM1: sw [[REG]],{{.*}}
 ; MIPS32-OPTM1: lw a0,{{.*}}
@@ -158,9 +158,9 @@
 ; ARM32:       bl {{.*}} R_{{.*}}    f1
 
 ; MIPS32-LABEL: fixed_351_align_32
-; MIPS32-OPT2: addiu sp,sp,-376
-; MIPS32-OPT2: addiu a0,sp,32
-; MIPS32-OPTM1: addiu sp,sp,-392
+; MIPS32-OPT2: addiu sp,sp,-384
+; MIPS32-OPT2: addiu a0,sp,16
+; MIPS32-OPTM1: addiu sp,sp,-400
 ; MIPS32-OPTM1: addiu [[REG:.*]],sp,32
 ; MIPS32-OPTM1: sw [[REG]],{{.*}}
 ; MIPS32-OPTM1: lw a0,{{.*}}
diff --git a/tests_lit/llvm2ice_tests/bitcast.ll b/tests_lit/llvm2ice_tests/bitcast.ll
index 8358a2d..1a38a76 100644
--- a/tests_lit/llvm2ice_tests/bitcast.ll
+++ b/tests_lit/llvm2ice_tests/bitcast.ll
@@ -65,8 +65,10 @@
 ; ARM32-LABEL: cast_d2ll
 ; ARM32: vmov r{{[0-9]+}}, r{{[0-9]+}}, d{{[0-9]+}}
 ; MIPS32-LABEL: cast_d2ll
-; MIPS32-O2: mfc1 $v1, $f{{[0-9]+}}
-; MIPS32-O2: mfc1 $v0, $f{{[0-9]+}}
+; MIPS32-O2: swc1 $f13, {{.*}}
+; MIPS32-O2: swc1 $f12, {{.*}}
+; MIPS32-O2: lw $v0, {{.*}}
+; MIPS32-O2: lw $v1, {{.*}}
 ; MIPS32-OM1: sdc1
 ; MIPS32-OM1: lw
 ; MIPS32-OM1: lw
@@ -87,12 +89,10 @@
 ; MIPS32-LABEL: cast_d2ll_const
 ; MIPS32: lui {{.*}}, %hi(.L$double$0012345678901234)
 ; MIPS32: ldc1 {{.*}}, %lo(.L$double$0012345678901234)({{.*}})
-; MIPS32-O2: mfc1 $v1, $f{{[0-9]+}}
-; MIPS32-O2: mfc1 $v0, $f{{[0-9]+}}
-; MIPS32-OM1: mfc1
-; MIPS32-OM1: mfc1
-; MIPS32-OM1: lw
-; MIPS32-OM1: lw
+; MIPS32: swc1 $f{{[0-9]+}}, {{.*}}
+; MIPS32: swc1 $f{{[0-9]+}}, {{.*}}
+; MIPS32: lw $v0, {{.*}}
+; MIPS32: lw $v1, {{.*}}
 
 define internal double @cast_ll2d(i64 %ll) {
 entry:
diff --git a/tests_lit/llvm2ice_tests/fp.call_ret.ll b/tests_lit/llvm2ice_tests/fp.call_ret.ll
index 833b9a6..460d70c 100644
--- a/tests_lit/llvm2ice_tests/fp.call_ret.ll
+++ b/tests_lit/llvm2ice_tests/fp.call_ret.ll
@@ -59,16 +59,16 @@
 ; CHECK: mov DWORD PTR [esp+0x4],0x7b
 ; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
 ; MIPS32-LABEL: passFpArgs
-; MIPS32: 	mfc1	a2,$f15
-; MIPS32: 	mfc1	a3,$f14
+; MIPS32: 	mfc1	a2,$f{{[0-9]+}}
+; MIPS32: 	mfc1	a3,$f{{[0-9]+}}
 ; MIPS32: 	li	a1,123
 ; MIPS32: 	jal	{{.*}}	ignoreFpArgsNoInline
-; MIPS32: 	mfc1	a2,$f23
-; MIPS32: 	mfc1	a3,$f22
+; MIPS32: 	mfc1	a2,$f{{[0-9]+}}
+; MIPS32: 	mfc1	a3,$f{{[0-9]+}}
 ; MIPS32: 	li	a1,123
 ; MIPS32: 	jal	{{.*}}	ignoreFpArgsNoInline
-; MIPS32: 	mfc1	a2,$f25
-; MIPS32: 	mfc1	a3,$f24
+; MIPS32: 	mfc1	a2,$f{{[0-9]+}}
+; MIPS32: 	mfc1	a3,$f{{[0-9]+}}
 ; MIPS32: 	li	a1,123
 ; MIPS32: 	jal	{{.*}}	ignoreFpArgsNoInline
 
@@ -83,8 +83,8 @@
 ; CHECK: mov DWORD PTR [esp+0x4],0x7b
 ; CHECK: call {{.*}} R_{{.*}} ignoreFpArgsNoInline
 ; MIPS32-LABEL: passFpConstArg
-; MIPS32: 	mfc1	a2,$f1
-; MIPS32: 	mfc1	a3,$f0
+; MIPS32: 	mfc1	a2,$f{{[0-9]+}}
+; MIPS32: 	mfc1	a3,$f{{[0-9]+}}
 ; MIPS32: 	li	a1,123
 ; MIPS32: 	jal	{{.*}}	ignoreFpArgsNoInline
 
@@ -131,8 +131,8 @@
 ; CHECK-LABEL: returnFloatConst
 ; CHECK: fld
 ; MIPS32-LABEL: returnFloatConst
-; MIPS32: 	lui	v0,0x0	   160: R_MIPS_HI16	.L$float$3f9d70a4
-; MIPS32: 	lwc1	$f0,0(v0)  164: R_MIPS_LO16	.L$float$3f9d70a4
+; MIPS32: 	lui	v0,0x0    {{.*}} .L$float$3f9d70a4
+; MIPS32: 	lwc1	$f0,0(v0) {{.*}} .L$float$3f9d70a4
 ; MIPS32: 	jr	ra
 
 define internal double @returnDoubleConst() {
@@ -142,6 +142,6 @@
 ; CHECK-LABEL: returnDoubleConst
 ; CHECK: fld
 ; MIPS32-LABEL: returnDoubleConst
-; MIPS32: 	lui	v0,0x0	   170: R_MIPS_HI16  .L$double$3ff3ae147ae147ae
-; MIPS32: 	ldc1	$f0,0(v0)  174: R_MIPS_LO16  .L$double$3ff3ae147ae147ae
+; MIPS32: 	lui	v0,0x0	   {{.*}}  .L$double$3ff3ae147ae147ae
+; MIPS32: 	ldc1	$f0,0(v0)  {{.*}}  .L$double$3ff3ae147ae147ae
 ; MIPS32: 	jr	ra
diff --git a/tests_lit/llvm2ice_tests/vector-cast.ll b/tests_lit/llvm2ice_tests/vector-cast.ll
index a23a1cd..13c2acd 100644
--- a/tests_lit/llvm2ice_tests/vector-cast.ll
+++ b/tests_lit/llvm2ice_tests/vector-cast.ll
@@ -70,7 +70,7 @@
 ; MIPS32: 	andi	a0,a0,0x1
 ; MIPS32: 	sll	a0,a0,0x1f
 ; MIPS32: 	sra	a0,a0,0x1f
-; MIPS32: 	srl	a0,a0,0x18
+; MIPS32: 	sll	a0,a0,0x18
 ; MIPS32: 	sll	t2,t2,0x8
 ; MIPS32: 	srl	t2,t2,0x8
 ; MIPS32: 	or	a0,a0,t2
@@ -111,7 +111,7 @@
 ; MIPS32: 	andi	a1,a1,0x1
 ; MIPS32: 	sll	a1,a1,0x1f
 ; MIPS32: 	sra	a1,a1,0x1f
-; MIPS32: 	srl	a1,a1,0x18
+; MIPS32: 	sll	a1,a1,0x18
 ; MIPS32: 	sll	v0,v0,0x8
 ; MIPS32: 	srl	v0,v0,0x8
 ; MIPS32: 	or	a1,a1,v0
@@ -152,7 +152,7 @@
 ; MIPS32: 	andi	a2,a2,0x1
 ; MIPS32: 	sll	a2,a2,0x1f
 ; MIPS32: 	sra	a2,a2,0x1f
-; MIPS32: 	srl	a2,a2,0x18
+; MIPS32: 	sll	a2,a2,0x18
 ; MIPS32: 	sll	v0,v0,0x8
 ; MIPS32: 	srl	v0,v0,0x8
 ; MIPS32: 	or	a2,a2,v0
@@ -193,7 +193,7 @@
 ; MIPS32: 	andi	a3,a3,0x1
 ; MIPS32: 	sll	a3,a3,0x1f
 ; MIPS32: 	sra	a3,a3,0x1f
-; MIPS32: 	srl	a3,a3,0x18
+; MIPS32: 	sll	a3,a3,0x18
 ; MIPS32: 	sll	v0,v0,0x8
 ; MIPS32: 	srl	v0,v0,0x8
 ; MIPS32: 	or	a3,a3,v0
@@ -354,7 +354,7 @@
 ; MIPS32: 	srl	a0,a0,0x18
 ; MIPS32: 	andi	a0,a0,0x1
 ; MIPS32: 	andi	a0,a0,0x1
-; MIPS32: 	srl	a0,a0,0x18
+; MIPS32: 	sll	a0,a0,0x18
 ; MIPS32: 	sll	t2,t2,0x8
 ; MIPS32: 	srl	t2,t2,0x8
 ; MIPS32: 	or	a0,a0,t2
@@ -391,7 +391,7 @@
 ; MIPS32: 	srl	a1,a1,0x18
 ; MIPS32: 	andi	a1,a1,0x1
 ; MIPS32: 	andi	a1,a1,0x1
-; MIPS32: 	srl	a1,a1,0x18
+; MIPS32: 	sll	a1,a1,0x18
 ; MIPS32: 	sll	v0,v0,0x8
 ; MIPS32: 	srl	v0,v0,0x8
 ; MIPS32: 	or	a1,a1,v0
@@ -428,7 +428,7 @@
 ; MIPS32: 	srl	a2,a2,0x18
 ; MIPS32: 	andi	a2,a2,0x1
 ; MIPS32: 	andi	a2,a2,0x1
-; MIPS32: 	srl	a2,a2,0x18
+; MIPS32: 	sll	a2,a2,0x18
 ; MIPS32: 	sll	v0,v0,0x8
 ; MIPS32: 	srl	v0,v0,0x8
 ; MIPS32: 	or	a2,a2,v0
@@ -465,7 +465,7 @@
 ; MIPS32: 	srl	a3,a3,0x18
 ; MIPS32: 	andi	a3,a3,0x1
 ; MIPS32: 	andi	a3,a3,0x1
-; MIPS32: 	srl	a3,a3,0x18
+; MIPS32: 	sll	a3,a3,0x18
 ; MIPS32: 	sll	v0,v0,0x8
 ; MIPS32: 	srl	v0,v0,0x8
 ; MIPS32: 	or	a3,a3,v0
@@ -581,6 +581,7 @@
 ; X8632: pand
 ; MIPS32: 	move	t2,a0
 ; MIPS32: 	andi	t2,t2,0xff
+; MIPS32: 	andi	t2,t2,0x1
 ; MIPS32: 	andi	t2,t2,0xff
 ; MIPS32: 	srl	v0,v0,0x8
 ; MIPS32: 	sll	v0,v0,0x8
@@ -588,6 +589,7 @@
 ; MIPS32: 	move	v0,a0
 ; MIPS32: 	srl	v0,v0,0x8
 ; MIPS32: 	andi	v0,v0,0xff
+; MIPS32: 	andi	v0,v0,0x1
 ; MIPS32: 	andi	v0,v0,0xff
 ; MIPS32: 	sll	v0,v0,0x8
 ; MIPS32: 	lui	t3,0xffff
@@ -597,6 +599,7 @@
 ; MIPS32: 	move	t2,a0
 ; MIPS32: 	srl	t2,t2,0x10
 ; MIPS32: 	andi	t2,t2,0xff
+; MIPS32: 	andi	t2,t2,0x1
 ; MIPS32: 	andi	t2,t2,0xff
 ; MIPS32: 	sll	t2,t2,0x10
 ; MIPS32: 	lui	t3,0xff00
@@ -604,12 +607,14 @@
 ; MIPS32: 	and	v0,v0,t3
 ; MIPS32: 	or	t2,t2,v0
 ; MIPS32: 	srl	a0,a0,0x18
-; MIPS32: 	srl	a0,a0,0x18
+; MIPS32: 	andi	a0,a0,0x1
+; MIPS32: 	sll	a0,a0,0x18
 ; MIPS32: 	sll	t2,t2,0x8
 ; MIPS32: 	srl	t2,t2,0x8
 ; MIPS32: 	or	a0,a0,t2
 ; MIPS32: 	move	v0,a1
 ; MIPS32: 	andi	v0,v0,0xff
+; MIPS32: 	andi	v0,v0,0x1
 ; MIPS32: 	andi	v0,v0,0xff
 ; MIPS32: 	srl	v1,v1,0x8
 ; MIPS32: 	sll	v1,v1,0x8
@@ -617,6 +622,7 @@
 ; MIPS32: 	move	v1,a1
 ; MIPS32: 	srl	v1,v1,0x8
 ; MIPS32: 	andi	v1,v1,0xff
+; MIPS32: 	andi	v1,v1,0x1
 ; MIPS32: 	andi	v1,v1,0xff
 ; MIPS32: 	sll	v1,v1,0x8
 ; MIPS32: 	lui	t2,0xffff
@@ -626,6 +632,7 @@
 ; MIPS32: 	move	v0,a1
 ; MIPS32: 	srl	v0,v0,0x10
 ; MIPS32: 	andi	v0,v0,0xff
+; MIPS32: 	andi	v0,v0,0x1
 ; MIPS32: 	andi	v0,v0,0xff
 ; MIPS32: 	sll	v0,v0,0x10
 ; MIPS32: 	lui	t2,0xff00
@@ -633,12 +640,14 @@
 ; MIPS32: 	and	v1,v1,t2
 ; MIPS32: 	or	v0,v0,v1
 ; MIPS32: 	srl	a1,a1,0x18
-; MIPS32: 	srl	a1,a1,0x18
+; MIPS32: 	andi	a1,a1,0x1
+; MIPS32: 	sll	a1,a1,0x18
 ; MIPS32: 	sll	v0,v0,0x8
 ; MIPS32: 	srl	v0,v0,0x8
 ; MIPS32: 	or	a1,a1,v0
 ; MIPS32: 	move	v0,a2
 ; MIPS32: 	andi	v0,v0,0xff
+; MIPS32: 	andi	v0,v0,0x1
 ; MIPS32: 	andi	v0,v0,0xff
 ; MIPS32: 	srl	t0,t0,0x8
 ; MIPS32: 	sll	t0,t0,0x8
@@ -646,6 +655,7 @@
 ; MIPS32: 	move	v1,a2
 ; MIPS32: 	srl	v1,v1,0x8
 ; MIPS32: 	andi	v1,v1,0xff
+; MIPS32: 	andi	v1,v1,0x1
 ; MIPS32: 	andi	v1,v1,0xff
 ; MIPS32: 	sll	v1,v1,0x8
 ; MIPS32: 	lui	t0,0xffff
@@ -655,6 +665,7 @@
 ; MIPS32: 	move	v0,a2
 ; MIPS32: 	srl	v0,v0,0x10
 ; MIPS32: 	andi	v0,v0,0xff
+; MIPS32: 	andi	v0,v0,0x1
 ; MIPS32: 	andi	v0,v0,0xff
 ; MIPS32: 	sll	v0,v0,0x10
 ; MIPS32: 	lui	t0,0xff00
@@ -662,12 +673,14 @@
 ; MIPS32: 	and	v1,v1,t0
 ; MIPS32: 	or	v0,v0,v1
 ; MIPS32: 	srl	a2,a2,0x18
-; MIPS32: 	srl	a2,a2,0x18
+; MIPS32: 	andi	a2,a2,0x1
+; MIPS32: 	sll	a2,a2,0x18
 ; MIPS32: 	sll	v0,v0,0x8
 ; MIPS32: 	srl	v0,v0,0x8
 ; MIPS32: 	or	a2,a2,v0
 ; MIPS32: 	move	v0,a3
 ; MIPS32: 	andi	v0,v0,0xff
+; MIPS32: 	andi	v0,v0,0x1
 ; MIPS32: 	andi	v0,v0,0xff
 ; MIPS32: 	srl	t1,t1,0x8
 ; MIPS32: 	sll	t1,t1,0x8
@@ -675,6 +688,7 @@
 ; MIPS32: 	move	v1,a3
 ; MIPS32: 	srl	v1,v1,0x8
 ; MIPS32: 	andi	v1,v1,0xff
+; MIPS32: 	andi	v1,v1,0x1
 ; MIPS32: 	andi	v1,v1,0xff
 ; MIPS32: 	sll	v1,v1,0x8
 ; MIPS32: 	lui	t0,0xffff
@@ -684,6 +698,7 @@
 ; MIPS32: 	move	v0,a3
 ; MIPS32: 	srl	v0,v0,0x10
 ; MIPS32: 	andi	v0,v0,0xff
+; MIPS32: 	andi	v0,v0,0x1
 ; MIPS32: 	andi	v0,v0,0xff
 ; MIPS32: 	sll	v0,v0,0x10
 ; MIPS32: 	lui	t0,0xff00
@@ -691,7 +706,8 @@
 ; MIPS32: 	and	v1,v1,t0
 ; MIPS32: 	or	v0,v0,v1
 ; MIPS32: 	srl	a3,a3,0x18
-; MIPS32: 	srl	a3,a3,0x18
+; MIPS32: 	andi	a3,a3,0x1
+; MIPS32: 	sll	a3,a3,0x18
 ; MIPS32: 	sll	v0,v0,0x8
 ; MIPS32: 	srl	v0,v0,0x8
 ; MIPS32: 	or	a3,a3,v0
diff --git a/tests_lit/llvm2ice_tests/vector-icmp.ll b/tests_lit/llvm2ice_tests/vector-icmp.ll
index 95c0961..54fba56 100644
--- a/tests_lit/llvm2ice_tests/vector-icmp.ll
+++ b/tests_lit/llvm2ice_tests/vector-icmp.ll
@@ -2758,7 +2758,7 @@
 ; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: xor [[T10]],[[T10]],[[T0]]
 ; MIPS32: sltiu [[T10]],[[T10]],1
-; MIPS32: srl [[T10]],[[T10]],0x18
+; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T8]],[[T8]],0x8
 ; MIPS32: srl [[T8]],[[T8]],0x8
 ; MIPS32: or [[T10]],[[T10]],[[T8]]
@@ -2812,7 +2812,7 @@
 ; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: xor [[T11]],[[T11]],[[T1]]
 ; MIPS32: sltiu [[T11]],[[T11]],1
-; MIPS32: srl [[T11]],[[T11]],0x18
+; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T11]],[[T11]],[[T0]]
@@ -2866,7 +2866,7 @@
 ; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: xor [[T12]],[[T12]],[[T2]]
 ; MIPS32: sltiu [[T12]],[[T12]],1
-; MIPS32: srl [[T12]],[[T12]],0x18
+; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T12]],[[T12]],[[T0]]
@@ -2920,7 +2920,7 @@
 ; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: xor [[T13]],[[T13]],[[T3]]
 ; MIPS32: sltiu [[T13]],[[T13]],1
-; MIPS32: srl [[T13]],[[T13]],0x18
+; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T13]],[[T13]],[[T0]]
@@ -2997,7 +2997,7 @@
 ; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: xor [[T10]],[[T10]],[[T0]]
 ; MIPS32: sltu [[T10]],zero,[[T10]]
-; MIPS32: srl [[T10]],[[T10]],0x18
+; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T8]],[[T8]],0x8
 ; MIPS32: srl [[T8]],[[T8]],0x8
 ; MIPS32: or [[T10]],[[T10]],[[T8]]
@@ -3051,7 +3051,7 @@
 ; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: xor [[T11]],[[T11]],[[T1]]
 ; MIPS32: sltu [[T11]],zero,[[T11]]
-; MIPS32: srl [[T11]],[[T11]],0x18
+; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T11]],[[T11]],[[T0]]
@@ -3105,7 +3105,7 @@
 ; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: xor [[T12]],[[T12]],[[T2]]
 ; MIPS32: sltu [[T12]],zero,[[T12]]
-; MIPS32: srl [[T12]],[[T12]],0x18
+; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T12]],[[T12]],[[T0]]
@@ -3159,7 +3159,7 @@
 ; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: xor [[T13]],[[T13]],[[T3]]
 ; MIPS32: sltu [[T13]],zero,[[T13]]
-; MIPS32: srl [[T13]],[[T13]],0x18
+; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T13]],[[T13]],[[T0]]
@@ -3231,7 +3231,7 @@
 ; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: slt [[T0]],[[T0]],[[T10]]
-; MIPS32: srl [[T0]],[[T0]],0x18
+; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: sll [[T9]],[[T9]],0x8
 ; MIPS32: srl [[T9]],[[T9]],0x8
 ; MIPS32: or v0,[[T0]],[[T9]]
@@ -3281,7 +3281,7 @@
 ; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: slt [[T1]],[[T1]],[[T11]]
-; MIPS32: srl [[T1]],[[T1]],0x18
+; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: sll [[T4]],[[T4]],0x8
 ; MIPS32: srl [[T4]],[[T4]],0x8
 ; MIPS32: or v1,[[T1]],[[T4]]
@@ -3331,7 +3331,7 @@
 ; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: slt [[T2]],[[T2]],[[T12]]
-; MIPS32: srl [[T2]],[[T2]],0x18
+; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T2]],[[T2]],[[T11]]
@@ -3381,7 +3381,7 @@
 ; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: slt [[T3]],[[T3]],[[T13]]
-; MIPS32: srl [[T3]],[[T3]],0x18
+; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T3]],[[T3]],[[T11]]
@@ -3456,7 +3456,7 @@
 ; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: slt [[T0]],[[T0]],[[T10]]
 ; MIPS32: xori [[T0]],[[T0]],0x1
-; MIPS32: srl [[T0]],[[T0]],0x18
+; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: sll [[T9]],[[T9]],0x8
 ; MIPS32: srl [[T9]],[[T9]],0x8
 ; MIPS32: or v0,[[T0]],[[T9]]
@@ -3510,7 +3510,7 @@
 ; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: slt [[T1]],[[T1]],[[T11]]
 ; MIPS32: xori [[T1]],[[T1]],0x1
-; MIPS32: srl [[T1]],[[T1]],0x18
+; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: sll [[T4]],[[T4]],0x8
 ; MIPS32: srl [[T4]],[[T4]],0x8
 ; MIPS32: or v1,[[T1]],[[T4]]
@@ -3564,7 +3564,7 @@
 ; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: slt [[T2]],[[T2]],[[T12]]
 ; MIPS32: xori [[T2]],[[T2]],0x1
-; MIPS32: srl [[T2]],[[T2]],0x18
+; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T2]],[[T2]],[[T11]]
@@ -3618,7 +3618,7 @@
 ; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: slt [[T3]],[[T3]],[[T13]]
 ; MIPS32: xori [[T3]],[[T3]],0x1
-; MIPS32: srl [[T3]],[[T3]],0x18
+; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T3]],[[T3]],[[T11]]
@@ -3688,7 +3688,7 @@
 ; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: slt [[T10]],[[T10]],[[T0]]
-; MIPS32: srl [[T10]],[[T10]],0x18
+; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T8]],[[T8]],0x8
 ; MIPS32: srl [[T8]],[[T8]],0x8
 ; MIPS32: or [[T10]],[[T10]],[[T8]]
@@ -3738,7 +3738,7 @@
 ; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: slt [[T11]],[[T11]],[[T1]]
-; MIPS32: srl [[T11]],[[T11]],0x18
+; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T11]],[[T11]],[[T0]]
@@ -3788,7 +3788,7 @@
 ; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: slt [[T12]],[[T12]],[[T2]]
-; MIPS32: srl [[T12]],[[T12]],0x18
+; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T12]],[[T12]],[[T0]]
@@ -3838,7 +3838,7 @@
 ; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: slt [[T13]],[[T13]],[[T3]]
-; MIPS32: srl [[T13]],[[T13]],0x18
+; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T13]],[[T13]],[[T0]]
@@ -3916,7 +3916,7 @@
 ; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: sltu [[T10]],[[T10]],[[T0]]
 ; MIPS32: xori [[T10]],[[T10]],0x1
-; MIPS32: srl [[T10]],[[T10]],0x18
+; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T8]],[[T8]],0x8
 ; MIPS32: srl [[T8]],[[T8]],0x8
 ; MIPS32: or [[T10]],[[T10]],[[T8]]
@@ -3970,7 +3970,7 @@
 ; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: sltu [[T11]],[[T11]],[[T1]]
 ; MIPS32: xori [[T11]],[[T11]],0x1
-; MIPS32: srl [[T11]],[[T11]],0x18
+; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T11]],[[T11]],[[T0]]
@@ -4024,7 +4024,7 @@
 ; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: sltu [[T12]],[[T12]],[[T2]]
 ; MIPS32: xori [[T12]],[[T12]],0x1
-; MIPS32: srl [[T12]],[[T12]],0x18
+; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T12]],[[T12]],[[T0]]
@@ -4078,7 +4078,7 @@
 ; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: sltu [[T13]],[[T13]],[[T3]]
 ; MIPS32: xori [[T13]],[[T13]],0x1
-; MIPS32: srl [[T13]],[[T13]],0x18
+; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T13]],[[T13]],[[T0]]
@@ -4151,7 +4151,7 @@
 ; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: sltu [[T0]],[[T0]],[[T10]]
-; MIPS32: srl [[T0]],[[T0]],0x18
+; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: sll [[T9]],[[T9]],0x8
 ; MIPS32: srl [[T9]],[[T9]],0x8
 ; MIPS32: or v0,[[T0]],[[T9]]
@@ -4201,7 +4201,7 @@
 ; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: sltu [[T1]],[[T1]],[[T11]]
-; MIPS32: srl [[T1]],[[T1]],0x18
+; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: sll [[T4]],[[T4]],0x8
 ; MIPS32: srl [[T4]],[[T4]],0x8
 ; MIPS32: or v1,[[T1]],[[T4]]
@@ -4251,7 +4251,7 @@
 ; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: sltu [[T2]],[[T2]],[[T12]]
-; MIPS32: srl [[T2]],[[T2]],0x18
+; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T2]],[[T2]],[[T11]]
@@ -4301,7 +4301,7 @@
 ; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: sltu [[T3]],[[T3]],[[T13]]
-; MIPS32: srl [[T3]],[[T3]],0x18
+; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T3]],[[T3]],[[T11]]
@@ -4377,7 +4377,7 @@
 ; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: sltu [[T0]],[[T0]],[[T10]]
 ; MIPS32: xori [[T0]],[[T0]],0x1
-; MIPS32: srl [[T0]],[[T0]],0x18
+; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: sll [[T9]],[[T9]],0x8
 ; MIPS32: srl [[T9]],[[T9]],0x8
 ; MIPS32: or v0,[[T0]],[[T9]]
@@ -4431,7 +4431,7 @@
 ; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: sltu [[T1]],[[T1]],[[T11]]
 ; MIPS32: xori [[T1]],[[T1]],0x1
-; MIPS32: srl [[T1]],[[T1]],0x18
+; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: sll [[T4]],[[T4]],0x8
 ; MIPS32: srl [[T4]],[[T4]],0x8
 ; MIPS32: or v1,[[T1]],[[T4]]
@@ -4485,7 +4485,7 @@
 ; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: sltu [[T2]],[[T2]],[[T12]]
 ; MIPS32: xori [[T2]],[[T2]],0x1
-; MIPS32: srl [[T2]],[[T2]],0x18
+; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T2]],[[T2]],[[T11]]
@@ -4539,7 +4539,7 @@
 ; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: sltu [[T3]],[[T3]],[[T13]]
 ; MIPS32: xori [[T3]],[[T3]],0x1
-; MIPS32: srl [[T3]],[[T3]],0x18
+; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T3]],[[T3]],[[T11]]
@@ -4610,7 +4610,7 @@
 ; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: sltu [[T10]],[[T10]],[[T0]]
-; MIPS32: srl [[T10]],[[T10]],0x18
+; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T8]],[[T8]],0x8
 ; MIPS32: srl [[T8]],[[T8]],0x8
 ; MIPS32: or [[T10]],[[T10]],[[T8]]
@@ -4660,7 +4660,7 @@
 ; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: sltu [[T11]],[[T11]],[[T1]]
-; MIPS32: srl [[T11]],[[T11]],0x18
+; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T11]],[[T11]],[[T0]]
@@ -4710,7 +4710,7 @@
 ; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: sltu [[T12]],[[T12]],[[T2]]
-; MIPS32: srl [[T12]],[[T12]],0x18
+; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T12]],[[T12]],[[T0]]
@@ -4760,7 +4760,7 @@
 ; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: sltu [[T13]],[[T13]],[[T3]]
-; MIPS32: srl [[T13]],[[T13]],0x18
+; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T13]],[[T13]],[[T0]]
@@ -4844,7 +4844,7 @@
 ; MIPS32: sll [[T0]],[[T0]],0x1f
 ; MIPS32: xor [[T10]],[[T10]],[[T0]]
 ; MIPS32: sltiu [[T10]],[[T10]],1
-; MIPS32: srl [[T10]],[[T10]],0x18
+; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T8]],[[T8]],0x8
 ; MIPS32: srl [[T8]],[[T8]],0x8
 ; MIPS32: or [[T10]],[[T10]],[[T8]]
@@ -4906,7 +4906,7 @@
 ; MIPS32: sll [[T1]],[[T1]],0x1f
 ; MIPS32: xor [[T11]],[[T11]],[[T1]]
 ; MIPS32: sltiu [[T11]],[[T11]],1
-; MIPS32: srl [[T11]],[[T11]],0x18
+; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T11]],[[T11]],[[T0]]
@@ -4968,7 +4968,7 @@
 ; MIPS32: sll [[T2]],[[T2]],0x1f
 ; MIPS32: xor [[T12]],[[T12]],[[T2]]
 ; MIPS32: sltiu [[T12]],[[T12]],1
-; MIPS32: srl [[T12]],[[T12]],0x18
+; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T12]],[[T12]],[[T0]]
@@ -5030,7 +5030,7 @@
 ; MIPS32: sll [[T3]],[[T3]],0x1f
 ; MIPS32: xor [[T13]],[[T13]],[[T3]]
 ; MIPS32: sltiu [[T13]],[[T13]],1
-; MIPS32: srl [[T13]],[[T13]],0x18
+; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T13]],[[T13]],[[T0]]
@@ -5115,7 +5115,7 @@
 ; MIPS32: sll [[T0]],[[T0]],0x1f
 ; MIPS32: xor [[T10]],[[T10]],[[T0]]
 ; MIPS32: sltu [[T10]],zero,[[T10]]
-; MIPS32: srl [[T10]],[[T10]],0x18
+; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T8]],[[T8]],0x8
 ; MIPS32: srl [[T8]],[[T8]],0x8
 ; MIPS32: or [[T10]],[[T10]],[[T8]]
@@ -5177,7 +5177,7 @@
 ; MIPS32: sll [[T1]],[[T1]],0x1f
 ; MIPS32: xor [[T11]],[[T11]],[[T1]]
 ; MIPS32: sltu [[T11]],zero,[[T11]]
-; MIPS32: srl [[T11]],[[T11]],0x18
+; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T11]],[[T11]],[[T0]]
@@ -5239,7 +5239,7 @@
 ; MIPS32: sll [[T2]],[[T2]],0x1f
 ; MIPS32: xor [[T12]],[[T12]],[[T2]]
 ; MIPS32: sltu [[T12]],zero,[[T12]]
-; MIPS32: srl [[T12]],[[T12]],0x18
+; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T12]],[[T12]],[[T0]]
@@ -5301,7 +5301,7 @@
 ; MIPS32: sll [[T3]],[[T3]],0x1f
 ; MIPS32: xor [[T13]],[[T13]],[[T3]]
 ; MIPS32: sltu [[T13]],zero,[[T13]]
-; MIPS32: srl [[T13]],[[T13]],0x18
+; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T13]],[[T13]],[[T0]]
@@ -5381,7 +5381,7 @@
 ; MIPS32: sll [[T10]],[[T10]],0x1f
 ; MIPS32: sll [[T0]],[[T0]],0x1f
 ; MIPS32: slt [[T0]],[[T0]],[[T10]]
-; MIPS32: srl [[T0]],[[T0]],0x18
+; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: sll [[T9]],[[T9]],0x8
 ; MIPS32: srl [[T9]],[[T9]],0x8
 ; MIPS32: or v0,[[T0]],[[T9]]
@@ -5439,7 +5439,7 @@
 ; MIPS32: sll [[T11]],[[T11]],0x1f
 ; MIPS32: sll [[T1]],[[T1]],0x1f
 ; MIPS32: slt [[T1]],[[T1]],[[T11]]
-; MIPS32: srl [[T1]],[[T1]],0x18
+; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: sll [[T4]],[[T4]],0x8
 ; MIPS32: srl [[T4]],[[T4]],0x8
 ; MIPS32: or v1,[[T1]],[[T4]]
@@ -5497,7 +5497,7 @@
 ; MIPS32: sll [[T12]],[[T12]],0x1f
 ; MIPS32: sll [[T2]],[[T2]],0x1f
 ; MIPS32: slt [[T2]],[[T2]],[[T12]]
-; MIPS32: srl [[T2]],[[T2]],0x18
+; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T2]],[[T2]],[[T11]]
@@ -5555,7 +5555,7 @@
 ; MIPS32: sll [[T13]],[[T13]],0x1f
 ; MIPS32: sll [[T3]],[[T3]],0x1f
 ; MIPS32: slt [[T3]],[[T3]],[[T13]]
-; MIPS32: srl [[T3]],[[T3]],0x18
+; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T3]],[[T3]],[[T11]]
@@ -5638,7 +5638,7 @@
 ; MIPS32: sll [[T0]],[[T0]],0x1f
 ; MIPS32: slt [[T0]],[[T0]],[[T10]]
 ; MIPS32: xori [[T0]],[[T0]],0x1
-; MIPS32: srl [[T0]],[[T0]],0x18
+; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: sll [[T9]],[[T9]],0x8
 ; MIPS32: srl [[T9]],[[T9]],0x8
 ; MIPS32: or v0,[[T0]],[[T9]]
@@ -5700,7 +5700,7 @@
 ; MIPS32: sll [[T1]],[[T1]],0x1f
 ; MIPS32: slt [[T1]],[[T1]],[[T11]]
 ; MIPS32: xori [[T1]],[[T1]],0x1
-; MIPS32: srl [[T1]],[[T1]],0x18
+; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: sll [[T4]],[[T4]],0x8
 ; MIPS32: srl [[T4]],[[T4]],0x8
 ; MIPS32: or v1,[[T1]],[[T4]]
@@ -5762,7 +5762,7 @@
 ; MIPS32: sll [[T2]],[[T2]],0x1f
 ; MIPS32: slt [[T2]],[[T2]],[[T12]]
 ; MIPS32: xori [[T2]],[[T2]],0x1
-; MIPS32: srl [[T2]],[[T2]],0x18
+; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T2]],[[T2]],[[T11]]
@@ -5824,7 +5824,7 @@
 ; MIPS32: sll [[T3]],[[T3]],0x1f
 ; MIPS32: slt [[T3]],[[T3]],[[T13]]
 ; MIPS32: xori [[T3]],[[T3]],0x1
-; MIPS32: srl [[T3]],[[T3]],0x18
+; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T3]],[[T3]],[[T11]]
@@ -5902,7 +5902,7 @@
 ; MIPS32: sll [[T10]],[[T10]],0x1f
 ; MIPS32: sll [[T0]],[[T0]],0x1f
 ; MIPS32: slt [[T10]],[[T10]],[[T0]]
-; MIPS32: srl [[T10]],[[T10]],0x18
+; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T8]],[[T8]],0x8
 ; MIPS32: srl [[T8]],[[T8]],0x8
 ; MIPS32: or [[T10]],[[T10]],[[T8]]
@@ -5960,7 +5960,7 @@
 ; MIPS32: sll [[T11]],[[T11]],0x1f
 ; MIPS32: sll [[T1]],[[T1]],0x1f
 ; MIPS32: slt [[T11]],[[T11]],[[T1]]
-; MIPS32: srl [[T11]],[[T11]],0x18
+; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T11]],[[T11]],[[T0]]
@@ -6018,7 +6018,7 @@
 ; MIPS32: sll [[T12]],[[T12]],0x1f
 ; MIPS32: sll [[T2]],[[T2]],0x1f
 ; MIPS32: slt [[T12]],[[T12]],[[T2]]
-; MIPS32: srl [[T12]],[[T12]],0x18
+; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T12]],[[T12]],[[T0]]
@@ -6076,7 +6076,7 @@
 ; MIPS32: sll [[T13]],[[T13]],0x1f
 ; MIPS32: sll [[T3]],[[T3]],0x1f
 ; MIPS32: slt [[T13]],[[T13]],[[T3]]
-; MIPS32: srl [[T13]],[[T13]],0x18
+; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T13]],[[T13]],[[T0]]
@@ -6162,7 +6162,7 @@
 ; MIPS32: sll [[T0]],[[T0]],0x1f
 ; MIPS32: sltu [[T10]],[[T10]],[[T0]]
 ; MIPS32: xori [[T10]],[[T10]],0x1
-; MIPS32: srl [[T10]],[[T10]],0x18
+; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T8]],[[T8]],0x8
 ; MIPS32: srl [[T8]],[[T8]],0x8
 ; MIPS32: or [[T10]],[[T10]],[[T8]]
@@ -6224,7 +6224,7 @@
 ; MIPS32: sll [[T1]],[[T1]],0x1f
 ; MIPS32: sltu [[T11]],[[T11]],[[T1]]
 ; MIPS32: xori [[T11]],[[T11]],0x1
-; MIPS32: srl [[T11]],[[T11]],0x18
+; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T11]],[[T11]],[[T0]]
@@ -6286,7 +6286,7 @@
 ; MIPS32: sll [[T2]],[[T2]],0x1f
 ; MIPS32: sltu [[T12]],[[T12]],[[T2]]
 ; MIPS32: xori [[T12]],[[T12]],0x1
-; MIPS32: srl [[T12]],[[T12]],0x18
+; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T12]],[[T12]],[[T0]]
@@ -6348,7 +6348,7 @@
 ; MIPS32: sll [[T3]],[[T3]],0x1f
 ; MIPS32: sltu [[T13]],[[T13]],[[T3]]
 ; MIPS32: xori [[T13]],[[T13]],0x1
-; MIPS32: srl [[T13]],[[T13]],0x18
+; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T13]],[[T13]],[[T0]]
@@ -6429,7 +6429,7 @@
 ; MIPS32: sll [[T10]],[[T10]],0x1f
 ; MIPS32: sll [[T0]],[[T0]],0x1f
 ; MIPS32: sltu [[T0]],[[T0]],[[T10]]
-; MIPS32: srl [[T0]],[[T0]],0x18
+; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: sll [[T9]],[[T9]],0x8
 ; MIPS32: srl [[T9]],[[T9]],0x8
 ; MIPS32: or v0,[[T0]],[[T9]]
@@ -6487,7 +6487,7 @@
 ; MIPS32: sll [[T11]],[[T11]],0x1f
 ; MIPS32: sll [[T1]],[[T1]],0x1f
 ; MIPS32: sltu [[T1]],[[T1]],[[T11]]
-; MIPS32: srl [[T1]],[[T1]],0x18
+; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: sll [[T4]],[[T4]],0x8
 ; MIPS32: srl [[T4]],[[T4]],0x8
 ; MIPS32: or v1,[[T1]],[[T4]]
@@ -6545,7 +6545,7 @@
 ; MIPS32: sll [[T12]],[[T12]],0x1f
 ; MIPS32: sll [[T2]],[[T2]],0x1f
 ; MIPS32: sltu [[T2]],[[T2]],[[T12]]
-; MIPS32: srl [[T2]],[[T2]],0x18
+; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T2]],[[T2]],[[T11]]
@@ -6603,7 +6603,7 @@
 ; MIPS32: sll [[T13]],[[T13]],0x1f
 ; MIPS32: sll [[T3]],[[T3]],0x1f
 ; MIPS32: sltu [[T3]],[[T3]],[[T13]]
-; MIPS32: srl [[T3]],[[T3]],0x18
+; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T3]],[[T3]],[[T11]]
@@ -6687,7 +6687,7 @@
 ; MIPS32: sll [[T0]],[[T0]],0x1f
 ; MIPS32: sltu [[T0]],[[T0]],[[T10]]
 ; MIPS32: xori [[T0]],[[T0]],0x1
-; MIPS32: srl [[T0]],[[T0]],0x18
+; MIPS32: sll [[T0]],[[T0]],0x18
 ; MIPS32: sll [[T9]],[[T9]],0x8
 ; MIPS32: srl [[T9]],[[T9]],0x8
 ; MIPS32: or v0,[[T0]],[[T9]]
@@ -6749,7 +6749,7 @@
 ; MIPS32: sll [[T1]],[[T1]],0x1f
 ; MIPS32: sltu [[T1]],[[T1]],[[T11]]
 ; MIPS32: xori [[T1]],[[T1]],0x1
-; MIPS32: srl [[T1]],[[T1]],0x18
+; MIPS32: sll [[T1]],[[T1]],0x18
 ; MIPS32: sll [[T4]],[[T4]],0x8
 ; MIPS32: srl [[T4]],[[T4]],0x8
 ; MIPS32: or v1,[[T1]],[[T4]]
@@ -6811,7 +6811,7 @@
 ; MIPS32: sll [[T2]],[[T2]],0x1f
 ; MIPS32: sltu [[T2]],[[T2]],[[T12]]
 ; MIPS32: xori [[T2]],[[T2]],0x1
-; MIPS32: srl [[T2]],[[T2]],0x18
+; MIPS32: sll [[T2]],[[T2]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T2]],[[T2]],[[T11]]
@@ -6873,7 +6873,7 @@
 ; MIPS32: sll [[T3]],[[T3]],0x1f
 ; MIPS32: sltu [[T3]],[[T3]],[[T13]]
 ; MIPS32: xori [[T3]],[[T3]],0x1
-; MIPS32: srl [[T3]],[[T3]],0x18
+; MIPS32: sll [[T3]],[[T3]],0x18
 ; MIPS32: sll [[T11]],[[T11]],0x8
 ; MIPS32: srl [[T11]],[[T11]],0x8
 ; MIPS32: or [[T3]],[[T3]],[[T11]]
@@ -6952,7 +6952,7 @@
 ; MIPS32: sll [[T10]],[[T10]],0x1f
 ; MIPS32: sll [[T0]],[[T0]],0x1f
 ; MIPS32: sltu [[T10]],[[T10]],[[T0]]
-; MIPS32: srl [[T10]],[[T10]],0x18
+; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T8]],[[T8]],0x8
 ; MIPS32: srl [[T8]],[[T8]],0x8
 ; MIPS32: or [[T10]],[[T10]],[[T8]]
@@ -7010,7 +7010,7 @@
 ; MIPS32: sll [[T11]],[[T11]],0x1f
 ; MIPS32: sll [[T1]],[[T1]],0x1f
 ; MIPS32: sltu [[T11]],[[T11]],[[T1]]
-; MIPS32: srl [[T11]],[[T11]],0x18
+; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T11]],[[T11]],[[T0]]
@@ -7068,7 +7068,7 @@
 ; MIPS32: sll [[T12]],[[T12]],0x1f
 ; MIPS32: sll [[T2]],[[T2]],0x1f
 ; MIPS32: sltu [[T12]],[[T12]],[[T2]]
-; MIPS32: srl [[T12]],[[T12]],0x18
+; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T12]],[[T12]],[[T0]]
@@ -7126,7 +7126,7 @@
 ; MIPS32: sll [[T13]],[[T13]],0x1f
 ; MIPS32: sll [[T3]],[[T3]],0x1f
 ; MIPS32: sltu [[T13]],[[T13]],[[T3]]
-; MIPS32: srl [[T13]],[[T13]],0x18
+; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T0]],[[T0]],0x8
 ; MIPS32: srl [[T0]],[[T0]],0x8
 ; MIPS32: or [[T13]],[[T13]],[[T0]]
diff --git a/tests_lit/llvm2ice_tests/vector-select.ll b/tests_lit/llvm2ice_tests/vector-select.ll
index 0555ddf..0d1fa92 100644
--- a/tests_lit/llvm2ice_tests/vector-select.ll
+++ b/tests_lit/llvm2ice_tests/vector-select.ll
@@ -29,7 +29,7 @@
 ; SSE41: pblendvb xmm{{[0-7]}},{{xmm[0-7]|XMMWORD}}
 
 ; MIPS32-LABEL: test_select_v16i8
-; MIPS32: addiu [[T0:.*]],sp,-20
+; MIPS32: addiu [[T0:.*]],sp,-32
 ; MIPS32: sw [[T1:.*]],
 ; MIPS32: sw [[T2:.*]],
 ; MIPS32: sw [[T3:.*]],
@@ -98,7 +98,7 @@
 ; MIPS32: srl [[T6]],[[T6]],0x18
 ; MIPS32: srl [[T10]],[[T10]],0x18
 ; MIPS32: movn [[T10]],[[T6]],[[T16]]
-; MIPS32: srl [[T10]],[[T10]],0x18
+; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T1]],[[T1]],0x8
 ; MIPS32: srl [[T1]],[[T1]],0x8
 ; MIPS32: or [[T10]],[[T10]],[[T1]]
@@ -153,7 +153,7 @@
 ; MIPS32: srl [[T7]],[[T7]],0x18
 ; MIPS32: srl [[T11]],[[T11]],0x18
 ; MIPS32: movn [[T11]],[[T7]],[[T17]]
-; MIPS32: srl [[T11]],[[T11]],0x18
+; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T14]],[[T14]],0x8
 ; MIPS32: srl [[T14]],[[T14]],0x8
 ; MIPS32: or [[T11]],[[T11]],[[T14]]
@@ -208,7 +208,7 @@
 ; MIPS32: srl [[T8]],[[T8]],0x18
 ; MIPS32: srl [[T12]],[[T12]],0x18
 ; MIPS32: movn [[T12]],[[T8]],[[T18]]
-; MIPS32: srl [[T12]],[[T12]],0x18
+; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T16]],[[T16]],0x8
 ; MIPS32: srl [[T16]],[[T16]],0x8
 ; MIPS32: or [[T12]],[[T12]],[[T16]]
@@ -263,7 +263,7 @@
 ; MIPS32: srl [[T9]],[[T9]],0x18
 ; MIPS32: srl [[T13]],[[T13]],0x18
 ; MIPS32: movn [[T13]],[[T9]],[[T19]]
-; MIPS32: srl [[T13]],[[T13]],0x18
+; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T16]],[[T16]],0x8
 ; MIPS32: srl [[T16]],[[T16]],0x8
 ; MIPS32: or [[T13]],[[T13]],[[T16]]
@@ -276,7 +276,7 @@
 ; MIPS32: lw [[T3]],
 ; MIPS32: lw [[T2]],
 ; MIPS32: lw [[T1]],
-; MIPS32: addiu [[T0]],sp,20
+; MIPS32: addiu [[T0]],sp,32
 }
 
 define internal <16 x i1> @test_select_v16i1(<16 x i1> %cond, <16 x i1> %arg1,
@@ -293,7 +293,7 @@
 ; SSE41: pblendvb xmm{{[0-7]}},{{xmm[0-7]|XMMWORD}}
 
 ; MIPS32-LABEL: test_select_v16i1
-; MIPS32: addiu [[T0:.*]],sp,-20
+; MIPS32: addiu [[T0:.*]],sp,-32
 ; MIPS32: sw [[T1:.*]],
 ; MIPS32: sw [[T2:.*]],
 ; MIPS32: sw [[T3:.*]],
@@ -370,7 +370,7 @@
 ; MIPS32: srl [[T10]],[[T10]],0x18
 ; MIPS32: andi [[T10]],[[T10]],0x1
 ; MIPS32: movn [[T10]],[[T6]],[[T16]]
-; MIPS32: srl [[T10]],[[T10]],0x18
+; MIPS32: sll [[T10]],[[T10]],0x18
 ; MIPS32: sll [[T1]],[[T1]],0x8
 ; MIPS32: srl [[T1]],[[T1]],0x8
 ; MIPS32: or [[T10]],[[T10]],[[T1]]
@@ -433,7 +433,7 @@
 ; MIPS32: srl [[T11]],[[T11]],0x18
 ; MIPS32: andi [[T11]],[[T11]],0x1
 ; MIPS32: movn [[T11]],[[T7]],[[T17]]
-; MIPS32: srl [[T11]],[[T11]],0x18
+; MIPS32: sll [[T11]],[[T11]],0x18
 ; MIPS32: sll [[T14]],[[T14]],0x8
 ; MIPS32: srl [[T14]],[[T14]],0x8
 ; MIPS32: or [[T11]],[[T11]],[[T14]]
@@ -496,7 +496,7 @@
 ; MIPS32: srl [[T12]],[[T12]],0x18
 ; MIPS32: andi [[T12]],[[T12]],0x1
 ; MIPS32: movn [[T12]],[[T8]],[[T18]]
-; MIPS32: srl [[T12]],[[T12]],0x18
+; MIPS32: sll [[T12]],[[T12]],0x18
 ; MIPS32: sll [[T16]],[[T16]],0x8
 ; MIPS32: srl [[T16]],[[T16]],0x8
 ; MIPS32: or [[T12]],[[T12]],[[T16]]
@@ -559,7 +559,7 @@
 ; MIPS32: srl [[T13]],[[T13]],0x18
 ; MIPS32: andi [[T13]],[[T13]],0x1
 ; MIPS32: movn [[T13]],[[T9]],[[T19]]
-; MIPS32: srl [[T13]],[[T13]],0x18
+; MIPS32: sll [[T13]],[[T13]],0x18
 ; MIPS32: sll [[T16]],[[T16]],0x8
 ; MIPS32: srl [[T16]],[[T16]],0x8
 ; MIPS32: or [[T13]],[[T13]],[[T16]]
@@ -572,7 +572,7 @@
 ; MIPS32: lw [[T3]],
 ; MIPS32: lw [[T2]],
 ; MIPS32: lw [[T1]],
-; MIPS32: addiu [[T0]],sp,20
+; MIPS32: addiu [[T0]],sp,32
 }
 
 define internal <8 x i16> @test_select_v8i16(<8 x i1> %cond, <8 x i16> %arg1,
@@ -589,7 +589,7 @@
 ; SSE41: pblendvb xmm{{[0-7]}},{{xmm[0-7]|XMMWORD}}
 
 ; MIPS32-LABEL: test_select_v8i16
-; MIPS32: addiu [[T0:.*]],sp,-20
+; MIPS32: addiu [[T0:.*]],sp,-32
 ; MIPS32: sw [[T1:.*]],
 ; MIPS32: sw [[T2:.*]],
 ; MIPS32: sw [[T3:.*]],
@@ -700,7 +700,7 @@
 ; MIPS32: lw [[T3]],
 ; MIPS32: lw [[T2]],
 ; MIPS32: lw [[T1]],
-; MIPS32: addiu [[T0]],sp,20
+; MIPS32: addiu [[T0]],sp,32
 }
 
 define internal <8 x i1> @test_select_v8i1(<8 x i1> %cond, <8 x i1> %arg1,
@@ -717,7 +717,7 @@
 ; SSE41: pblendvb xmm{{[0-7]}},{{xmm[0-7]|XMMWORD}}
 
 ; MIPS32-LABEL: test_select_v8i1
-; MIPS32: addiu [[T0:.*]],sp,-20
+; MIPS32: addiu [[T0:.*]],sp,-32
 ; MIPS32: sw [[T1:.*]],
 ; MIPS32: sw [[T2:.*]],
 ; MIPS32: sw [[T3:.*]],
@@ -844,7 +844,7 @@
 ; MIPS32: lw [[T3]],
 ; MIPS32: lw [[T2]],
 ; MIPS32: lw [[T1]],
-; MIPS32: addiu [[T0]],sp,20
+; MIPS32: addiu [[T0]],sp,32
 }
 
 define internal <4 x i32> @test_select_v4i32(<4 x i1> %cond, <4 x i32> %arg1,