diff --git a/Makefile.standalone b/Makefile.standalone
index a1584f5..beaf9b6 100644
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -39,7 +39,9 @@
 	IceGlobalContext.cpp \
 	IceInst.cpp \
 	IceInstX8632.cpp \
+	IceLiveness.cpp \
 	IceOperand.cpp \
+	IceRegAlloc.cpp \
 	IceTargetLowering.cpp \
 	IceTargetLoweringX8632.cpp \
 	IceTypes.cpp \
diff --git a/crosstest/runtests.sh b/crosstest/runtests.sh
index 400e7a4..4ba208f 100755
--- a/crosstest/runtests.sh
+++ b/crosstest/runtests.sh
@@ -5,7 +5,7 @@
 
 set -eux
 
-OPTLEVELS="m1"
+OPTLEVELS="m1 2"
 OUTDIR=Output
 # Clean the output directory to avoid reusing stale results.
 rm -rf "${OUTDIR}"
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 3d720fa..6bd9bf7 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -16,6 +16,7 @@
 #include "IceCfgNode.h"
 #include "IceDefs.h"
 #include "IceInst.h"
+#include "IceLiveness.h"
 #include "IceOperand.h"
 #include "IceTargetLowering.h"
 
@@ -24,7 +25,7 @@
 Cfg::Cfg(GlobalContext *Ctx)
     : Ctx(Ctx), FunctionName(""), ReturnType(IceType_void),
       IsInternalLinkage(false), HasError(false), ErrorMessage(""), Entry(NULL),
-      NextInstNumber(1),
+      NextInstNumber(1), Live(NULL),
       Target(TargetLowering::createLowering(Ctx->getTargetArch(), this)),
       CurrentNode(NULL) {}
 
@@ -62,14 +63,10 @@
 bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }
 
 void Cfg::translate() {
-  Ostream &Str = Ctx->getStrDump();
   if (hasError())
     return;
 
-  if (Ctx->isVerbose()) {
-    Str << "================ Initial CFG ================\n";
-    dump();
-  }
+  dump("Initial CFG");
 
   Timer T_translate;
   // The set of translation passes and their order are determined by
@@ -77,10 +74,7 @@
   getTarget()->translate();
   T_translate.printElapsedUs(getContext(), "translate()");
 
-  if (Ctx->isVerbose()) {
-    Str << "================ Final output ================\n";
-    dump();
-  }
+  dump("Final output");
 }
 
 void Cfg::computePredecessors() {
@@ -89,6 +83,13 @@
   }
 }
 
+void Cfg::renumberInstructions() {
+  NextInstNumber = 1;
+  for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    (*I)->renumberInstructions();
+  }
+}
+
 // placePhiLoads() must be called before placePhiStores().
 void Cfg::placePhiLoads() {
   for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
@@ -109,6 +110,12 @@
   }
 }
 
+void Cfg::doAddressOpt() {
+  for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    (*I)->doAddressOpt();
+  }
+}
+
 void Cfg::genCode() {
   for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
     (*I)->genCode();
@@ -128,6 +135,150 @@
   }
 }
 
+// This is a lightweight version of live-range-end calculation.  Marks
+// the last use of only those variables whose definition and uses are
+// completely with a single block.  It is a quick single pass and
+// doesn't need to iterate until convergence.
+void Cfg::livenessLightweight() {
+  for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    (*I)->livenessLightweight();
+  }
+}
+
+void Cfg::liveness(LivenessMode Mode) {
+  Live.reset(new Liveness(this, Mode));
+  Live->init();
+  // Initialize with all nodes needing to be processed.
+  llvm::BitVector NeedToProcess(Nodes.size(), true);
+  while (NeedToProcess.any()) {
+    // Iterate in reverse topological order to speed up convergence.
+    for (NodeList::reverse_iterator I = Nodes.rbegin(), E = Nodes.rend();
+         I != E; ++I) {
+      CfgNode *Node = *I;
+      if (NeedToProcess[Node->getIndex()]) {
+        NeedToProcess[Node->getIndex()] = false;
+        bool Changed = Node->liveness(getLiveness());
+        if (Changed) {
+          // If the beginning-of-block liveness changed since the last
+          // iteration, mark all in-edges as needing to be processed.
+          const NodeList &InEdges = Node->getInEdges();
+          for (NodeList::const_iterator I1 = InEdges.begin(),
+                                        E1 = InEdges.end();
+               I1 != E1; ++I1) {
+            CfgNode *Pred = *I1;
+            NeedToProcess[Pred->getIndex()] = true;
+          }
+        }
+      }
+    }
+  }
+  if (Mode == Liveness_Intervals) {
+    // Reset each variable's live range.
+    for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
+         I != E; ++I) {
+      if (Variable *Var = *I)
+        Var->resetLiveRange();
+    }
+  }
+  // Collect timing for just the portion that constructs the live
+  // range intervals based on the end-of-live-range computation, for a
+  // finer breakdown of the cost.
+  Timer T_liveRange;
+  // Make a final pass over instructions to delete dead instructions
+  // and build each Variable's live range.
+  for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    (*I)->livenessPostprocess(Mode, getLiveness());
+  }
+  if (Mode == Liveness_Intervals) {
+    // Special treatment for live in-args.  Their liveness needs to
+    // extend beyond the beginning of the function, otherwise an arg
+    // whose only use is in the first instruction will end up having
+    // the trivial live range [1,1) and will *not* interfere with
+    // other arguments.  So if the first instruction of the method is
+    // "r=arg1+arg2", both args may be assigned the same register.
+    for (SizeT I = 0; I < Args.size(); ++I) {
+      Variable *Arg = Args[I];
+      if (!Live->getLiveRange(Arg).isEmpty()) {
+        // Add live range [-1,0) with weight 0.  TODO: Here and below,
+        // use better symbolic constants along the lines of
+        // Inst::NumberDeleted and Inst::NumberSentinel instead of -1
+        // and 0.
+        Live->addLiveRange(Arg, -1, 0, 0);
+      }
+      // Do the same for i64 args that may have been lowered into i32
+      // Lo and Hi components.
+      Variable *Lo = Arg->getLo();
+      if (Lo && !Live->getLiveRange(Lo).isEmpty())
+        Live->addLiveRange(Lo, -1, 0, 0);
+      Variable *Hi = Arg->getHi();
+      if (Hi && !Live->getLiveRange(Hi).isEmpty())
+        Live->addLiveRange(Hi, -1, 0, 0);
+    }
+    // Copy Liveness::LiveRanges into individual variables.  TODO:
+    // Remove Variable::LiveRange and redirect to
+    // Liveness::LiveRanges.  TODO: make sure Variable weights
+    // are applied properly.
+    SizeT NumVars = Variables.size();
+    for (SizeT i = 0; i < NumVars; ++i) {
+      Variable *Var = Variables[i];
+      Var->setLiveRange(Live->getLiveRange(Var));
+      if (Var->getWeight().isInf())
+        Var->setLiveRangeInfiniteWeight();
+    }
+    T_liveRange.printElapsedUs(getContext(), "live range construction");
+    dump();
+  }
+}
+
+// Traverse every Variable of every Inst and verify that it
+// appears within the Variable's computed live range.
+bool Cfg::validateLiveness() const {
+  bool Valid = true;
+  Ostream &Str = Ctx->getStrDump();
+  for (NodeList::const_iterator I1 = Nodes.begin(), E1 = Nodes.end(); I1 != E1;
+       ++I1) {
+    CfgNode *Node = *I1;
+    InstList &Insts = Node->getInsts();
+    for (InstList::const_iterator I2 = Insts.begin(), E2 = Insts.end();
+         I2 != E2; ++I2) {
+      Inst *Inst = *I2;
+      if (Inst->isDeleted())
+        continue;
+      if (llvm::isa<InstFakeKill>(Inst))
+        continue;
+      InstNumberT InstNumber = Inst->getNumber();
+      Variable *Dest = Inst->getDest();
+      if (Dest) {
+        // TODO: This instruction should actually begin Dest's live
+        // range, so we could probably test that this instruction is
+        // the beginning of some segment of Dest's live range.  But
+        // this wouldn't work with non-SSA temporaries during
+        // lowering.
+        if (!Dest->getLiveRange().containsValue(InstNumber)) {
+          Valid = false;
+          Str << "Liveness error: inst " << Inst->getNumber() << " dest ";
+          Dest->dump(this);
+          Str << " live range " << Dest->getLiveRange() << "\n";
+        }
+      }
+      for (SizeT I = 0; I < Inst->getSrcSize(); ++I) {
+        Operand *Src = Inst->getSrc(I);
+        SizeT NumVars = Src->getNumVars();
+        for (SizeT J = 0; J < NumVars; ++J) {
+          const Variable *Var = Src->getVar(J);
+          if (!Var->getLiveRange().containsValue(InstNumber)) {
+            Valid = false;
+            Str << "Liveness error: inst " << Inst->getNumber() << " var ";
+            Var->dump(this);
+            Str << " live range " << Var->getLiveRange() << "\n";
+          }
+        }
+      }
+    }
+  }
+  return Valid;
+}
+
 // ======================== Dump routines ======================== //
 
 void Cfg::emit() {
@@ -158,8 +309,13 @@
   T_emit.printElapsedUs(Ctx, "emit()");
 }
 
-void Cfg::dump() {
+// Dumps the IR with an optional introductory message.
+void Cfg::dump(const IceString &Message) {
+  if (!Ctx->isVerbose())
+    return;
   Ostream &Str = Ctx->getStrDump();
+  if (!Message.empty())
+    Str << "================ " << Message << " ================\n";
   setCurrentNode(getEntryNode());
   // Print function name+args
   if (getContext()->isVerbose(IceV_Instructions)) {
@@ -176,6 +332,18 @@
     Str << ") {\n";
   }
   setCurrentNode(NULL);
+  if (getContext()->isVerbose(IceV_Liveness)) {
+    // Print summary info about variables
+    for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
+         I != E; ++I) {
+      Variable *Var = *I;
+      Str << "//"
+          << " multiblock=" << Var->isMultiblockLife() << " "
+          << " weight=" << Var->getWeight() << " ";
+      Var->dump(this);
+      Str << " LIVE=" << Var->getLiveRange() << "\n";
+    }
+  }
   // Print each basic block
   for (NodeList::const_iterator I = Nodes.begin(), E = Nodes.end(); I != E;
        ++I) {
diff --git a/src/IceCfg.h b/src/IceCfg.h
index 5f3bfd3..dbf08c6 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -58,7 +58,7 @@
   const NodeList &getNodes() const { return Nodes; }
 
   // Manage instruction numbering.
-  int32_t newInstNumber() { return NextInstNumber++; }
+  InstNumberT newInstNumber() { return NextInstNumber++; }
 
   // Manage Variables.
   Variable *makeVariable(Type Ty, const CfgNode *Node,
@@ -72,6 +72,7 @@
 
   // Miscellaneous accessors.
   TargetLowering *getTarget() const { return Target.get(); }
+  Liveness *getLiveness() const { return Live.get(); }
   bool hasComputedFrame() const;
 
   // Passes over the CFG.
@@ -80,11 +81,16 @@
   // compute the predecessor edges, in the form of
   // CfgNode::InEdges[].
   void computePredecessors();
+  void renumberInstructions();
   void placePhiLoads();
   void placePhiStores();
   void deletePhis();
+  void doAddressOpt();
   void genCode();
   void genFrame();
+  void livenessLightweight();
+  void liveness(LivenessMode Mode);
+  bool validateLiveness() const;
 
   // Manage the CurrentNode field, which is used for validating the
   // Variable::DefNode field during dumping/emitting.
@@ -92,7 +98,7 @@
   const CfgNode *getCurrentNode() const { return CurrentNode; }
 
   void emit();
-  void dump();
+  void dump(const IceString &Message = "");
 
   // Allocate data of type T using the per-Cfg allocator.
   template <typename T> T *allocate() { return Allocator.Allocate<T>(); }
@@ -136,9 +142,10 @@
   IceString ErrorMessage;
   CfgNode *Entry; // entry basic block
   NodeList Nodes; // linearized node list; Entry should be first
-  int32_t NextInstNumber;
+  InstNumberT NextInstNumber;
   VarList Variables;
   VarList Args; // subset of Variables, in argument order
+  llvm::OwningPtr<Liveness> Live;
   llvm::OwningPtr<TargetLowering> Target;
 
   // CurrentNode is maintained during dumping/emitting just for
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index c00b2c7..3fd63e2 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -15,6 +15,7 @@
 #include "IceCfg.h"
 #include "IceCfgNode.h"
 #include "IceInst.h"
+#include "IceLiveness.h"
 #include "IceOperand.h"
 #include "IceTargetLowering.h"
 
@@ -49,6 +50,22 @@
   Inst->updateVars(this);
 }
 
+// Renumbers the non-deleted instructions in the node.  This needs to
+// be done in preparation for live range analysis.  The instruction
+// numbers in a block must be monotonically increasing.  The range of
+// instruction numbers in a block, from lowest to highest, must not
+// overlap with the range of any other block.
+void CfgNode::renumberInstructions() {
+  for (PhiList::const_iterator I = Phis.begin(), E = Phis.end(); I != E; ++I) {
+    (*I)->renumber(Func);
+  }
+  InstList::const_iterator I = Insts.begin(), E = Insts.end();
+  while (I != E) {
+    Inst *Inst = *I++;
+    Inst->renumber(Func);
+  }
+}
+
 // When a node is created, the OutEdges are immediately knows, but the
 // InEdges have to be built up incrementally.  After the CFG has been
 // constructed, the computePredecessors() pass finalizes it by
@@ -107,10 +124,25 @@
   // Every block must end in a terminator instruction.
   assert(InsertionPoint != Insts.begin());
   --InsertionPoint;
-  // Confirm via assert() that InsertionPoint is a terminator
-  // instruction.  Calling getTerminatorEdges() on a non-terminator
-  // instruction will cause an llvm_unreachable().
-  assert(((*InsertionPoint)->getTerminatorEdges(), true));
+  // Confirm that InsertionPoint is a terminator instruction.  Calling
+  // getTerminatorEdges() on a non-terminator instruction will cause
+  // an llvm_unreachable().
+  (void)(*InsertionPoint)->getTerminatorEdges();
+  // If the current insertion point is at a conditional branch
+  // instruction, and the previous instruction is a compare
+  // instruction, then we move the insertion point before the compare
+  // instruction so as not to interfere with compare/branch fusing.
+  if (InstBr *Branch = llvm::dyn_cast<InstBr>(*InsertionPoint)) {
+    if (!Branch->isUnconditional()) {
+      if (InsertionPoint != Insts.begin()) {
+        --InsertionPoint;
+        if (!llvm::isa<InstIcmp>(*InsertionPoint) &&
+            !llvm::isa<InstFcmp>(*InsertionPoint)) {
+          ++InsertionPoint;
+        }
+      }
+    }
+  }
 
   // Consider every out-edge.
   for (NodeList::const_iterator I1 = OutEdges.begin(), E1 = OutEdges.end();
@@ -145,6 +177,19 @@
   }
 }
 
+// Does address mode optimization.  Pass each instruction to the
+// TargetLowering object.  If it returns a new instruction
+// (representing the optimized address mode), then insert the new
+// instruction and delete the old.
+void CfgNode::doAddressOpt() {
+  TargetLowering *Target = Func->getTarget();
+  LoweringContext &Context = Target->getContext();
+  Context.init(this);
+  while (!Context.atEnd()) {
+    Target->doAddressOpt();
+  }
+}
+
 // Drives the target lowering.  Passes the current instruction and the
 // next non-deleted instruction for target lowering.
 void CfgNode::genCode() {
@@ -162,6 +207,189 @@
   }
 }
 
+void CfgNode::livenessLightweight() {
+  SizeT NumVars = Func->getNumVariables();
+  llvm::BitVector Live(NumVars);
+  // Process regular instructions in reverse order.
+  for (InstList::const_reverse_iterator I = Insts.rbegin(), E = Insts.rend();
+       I != E; ++I) {
+    if ((*I)->isDeleted())
+      continue;
+    (*I)->livenessLightweight(Live);
+  }
+  for (PhiList::const_iterator I = Phis.begin(), E = Phis.end(); I != E; ++I) {
+    if ((*I)->isDeleted())
+      continue;
+    (*I)->livenessLightweight(Live);
+  }
+}
+
+// Performs liveness analysis on the block.  Returns true if the
+// incoming liveness changed from before, false if it stayed the same.
+// (If it changes, the node's predecessors need to be processed
+// again.)
+bool CfgNode::liveness(Liveness *Liveness) {
+  SizeT NumVars = Liveness->getNumVarsInNode(this);
+  llvm::BitVector Live(NumVars);
+  // Mark the beginning and ending of each variable's live range
+  // with the sentinel instruction number 0.
+  std::vector<InstNumberT> &LiveBegin = Liveness->getLiveBegin(this);
+  std::vector<InstNumberT> &LiveEnd = Liveness->getLiveEnd(this);
+  LiveBegin.assign(NumVars, Inst::NumberSentinel);
+  LiveEnd.assign(NumVars, Inst::NumberSentinel);
+  // Initialize Live to be the union of all successors' LiveIn.
+  for (NodeList::const_iterator I = OutEdges.begin(), E = OutEdges.end();
+       I != E; ++I) {
+    CfgNode *Succ = *I;
+    Live |= Liveness->getLiveIn(Succ);
+    // Mark corresponding argument of phis in successor as live.
+    for (PhiList::const_iterator I1 = Succ->Phis.begin(), E1 = Succ->Phis.end();
+         I1 != E1; ++I1) {
+      (*I1)->livenessPhiOperand(Live, this, Liveness);
+    }
+  }
+  Liveness->getLiveOut(this) = Live;
+
+  // Process regular instructions in reverse order.
+  for (InstList::const_reverse_iterator I = Insts.rbegin(), E = Insts.rend();
+       I != E; ++I) {
+    if ((*I)->isDeleted())
+      continue;
+    (*I)->liveness((*I)->getNumber(), Live, Liveness, this);
+  }
+  // Process phis in forward order so that we can override the
+  // instruction number to be that of the earliest phi instruction in
+  // the block.
+  InstNumberT FirstPhiNumber = Inst::NumberSentinel;
+  for (PhiList::const_iterator I = Phis.begin(), E = Phis.end(); I != E; ++I) {
+    if ((*I)->isDeleted())
+      continue;
+    if (FirstPhiNumber == Inst::NumberSentinel)
+      FirstPhiNumber = (*I)->getNumber();
+    (*I)->liveness(FirstPhiNumber, Live, Liveness, this);
+  }
+
+  // When using the sparse representation, after traversing the
+  // instructions in the block, the Live bitvector should only contain
+  // set bits for global variables upon block entry.  We validate this
+  // by shrinking the Live vector and then testing it against the
+  // pre-shrunk version.  (The shrinking is required, but the
+  // validation is not.)
+  llvm::BitVector LiveOrig = Live;
+  Live.resize(Liveness->getNumGlobalVars());
+  // Non-global arguments in the entry node are allowed to be live on
+  // entry.
+  bool IsEntry = (Func->getEntryNode() == this);
+  if (!(IsEntry || Live == LiveOrig)) {
+    // This is a fatal liveness consistency error.  Print some
+    // diagnostics and abort.
+    Ostream &Str = Func->getContext()->getStrDump();
+    Func->setCurrentNode(NULL);
+    Str << "LiveOrig-Live =";
+    for (SizeT i = Live.size(); i < LiveOrig.size(); ++i) {
+      if (LiveOrig.test(i)) {
+        Str << " ";
+        Liveness->getVariable(i, this)->dump(Func);
+      }
+    }
+    Str << "\n";
+    llvm_unreachable("Fatal inconsistency in liveness analysis");
+  }
+
+  bool Changed = false;
+  llvm::BitVector &LiveIn = Liveness->getLiveIn(this);
+  // Add in current LiveIn
+  Live |= LiveIn;
+  // Check result, set LiveIn=Live
+  Changed = (Live != LiveIn);
+  if (Changed)
+    LiveIn = Live;
+  return Changed;
+}
+
+// Now that basic liveness is complete, remove dead instructions that
+// were tentatively marked as dead, and compute actual live ranges.
+// It is assumed that within a single basic block, a live range begins
+// at most once and ends at most once.  This is certainly true for
+// pure SSA form.  It is also true once phis are lowered, since each
+// assignment to the phi-based temporary is in a different basic
+// block, and there is a single read that ends the live in the basic
+// block that contained the actual phi instruction.
+void CfgNode::livenessPostprocess(LivenessMode Mode, Liveness *Liveness) {
+  InstNumberT FirstInstNum = Inst::NumberSentinel;
+  InstNumberT LastInstNum = Inst::NumberSentinel;
+  // Process phis in any order.  Process only Dest operands.
+  for (PhiList::const_iterator I = Phis.begin(), E = Phis.end(); I != E; ++I) {
+    InstPhi *Inst = *I;
+    Inst->deleteIfDead();
+    if (Inst->isDeleted())
+      continue;
+    if (FirstInstNum == Inst::NumberSentinel)
+      FirstInstNum = Inst->getNumber();
+    assert(Inst->getNumber() > LastInstNum);
+    LastInstNum = Inst->getNumber();
+  }
+  // Process instructions
+  for (InstList::const_iterator I = Insts.begin(), E = Insts.end(); I != E;
+       ++I) {
+    Inst *Inst = *I;
+    Inst->deleteIfDead();
+    if (Inst->isDeleted())
+      continue;
+    if (FirstInstNum == Inst::NumberSentinel)
+      FirstInstNum = Inst->getNumber();
+    assert(Inst->getNumber() > LastInstNum);
+    LastInstNum = Inst->getNumber();
+    // Create fake live ranges for a Kill instruction, but only if the
+    // linked instruction is still alive.
+    if (Mode == Liveness_Intervals) {
+      if (InstFakeKill *Kill = llvm::dyn_cast<InstFakeKill>(Inst)) {
+        if (!Kill->getLinked()->isDeleted()) {
+          SizeT NumSrcs = Inst->getSrcSize();
+          for (SizeT i = 0; i < NumSrcs; ++i) {
+            Variable *Var = llvm::cast<Variable>(Inst->getSrc(i));
+            InstNumberT InstNumber = Inst->getNumber();
+            Liveness->addLiveRange(Var, InstNumber, InstNumber, 1);
+          }
+        }
+      }
+    }
+  }
+  if (Mode != Liveness_Intervals)
+    return;
+
+  SizeT NumVars = Liveness->getNumVarsInNode(this);
+  SizeT NumGlobals = Liveness->getNumGlobalVars();
+  llvm::BitVector &LiveIn = Liveness->getLiveIn(this);
+  llvm::BitVector &LiveOut = Liveness->getLiveOut(this);
+  std::vector<InstNumberT> &LiveBegin = Liveness->getLiveBegin(this);
+  std::vector<InstNumberT> &LiveEnd = Liveness->getLiveEnd(this);
+  for (SizeT i = 0; i < NumVars; ++i) {
+    // Deal with the case where the variable is both live-in and
+    // live-out, but LiveEnd comes before LiveBegin.  In this case, we
+    // need to add two segments to the live range because there is a
+    // hole in the middle.  This would typically happen as a result of
+    // phi lowering in the presence of loopback edges.
+    bool IsGlobal = (i < NumGlobals);
+    if (IsGlobal && LiveIn[i] && LiveOut[i] && LiveBegin[i] > LiveEnd[i]) {
+      Variable *Var = Liveness->getVariable(i, this);
+      Liveness->addLiveRange(Var, FirstInstNum, LiveEnd[i], 1);
+      Liveness->addLiveRange(Var, LiveBegin[i], LastInstNum + 1, 1);
+      continue;
+    }
+    InstNumberT Begin = (IsGlobal && LiveIn[i]) ? FirstInstNum : LiveBegin[i];
+    InstNumberT End = (IsGlobal && LiveOut[i]) ? LastInstNum + 1 : LiveEnd[i];
+    if (Begin == Inst::NumberSentinel && End == Inst::NumberSentinel)
+      continue;
+    if (Begin <= FirstInstNum)
+      Begin = FirstInstNum;
+    if (End == Inst::NumberSentinel)
+      End = LastInstNum + 1;
+    Variable *Var = Liveness->getVariable(i, this);
+    Liveness->addLiveRange(Var, Begin, End, 1);
+  }
+}
+
 // ======================== Dump routines ======================== //
 
 void CfgNode::emit(Cfg *Func) const {
@@ -194,6 +422,7 @@
 void CfgNode::dump(Cfg *Func) const {
   Func->setCurrentNode(this);
   Ostream &Str = Func->getContext()->getStrDump();
+  Liveness *Liveness = Func->getLiveness();
   if (Func->getContext()->isVerbose(IceV_Instructions)) {
     Str << getName() << ":\n";
   }
@@ -208,6 +437,19 @@
     }
     Str << "\n";
   }
+  // Dump the live-in variables.
+  llvm::BitVector LiveIn;
+  if (Liveness)
+    LiveIn = Liveness->getLiveIn(this);
+  if (Func->getContext()->isVerbose(IceV_Liveness) && !LiveIn.empty()) {
+    Str << "    // LiveIn:";
+    for (SizeT i = 0; i < LiveIn.size(); ++i) {
+      if (LiveIn[i]) {
+        Str << " %" << Liveness->getVariable(i, this)->getName();
+      }
+    }
+    Str << "\n";
+  }
   // Dump each instruction.
   if (Func->getContext()->isVerbose(IceV_Instructions)) {
     for (PhiList::const_iterator I = Phis.begin(), E = Phis.end(); I != E;
@@ -221,6 +463,19 @@
       Inst->dumpDecorated(Func);
     }
   }
+  // Dump the live-out variables.
+  llvm::BitVector LiveOut;
+  if (Liveness)
+    LiveOut = Liveness->getLiveOut(this);
+  if (Func->getContext()->isVerbose(IceV_Liveness) && !LiveOut.empty()) {
+    Str << "    // LiveOut:";
+    for (SizeT i = 0; i < LiveOut.size(); ++i) {
+      if (LiveOut[i]) {
+        Str << " %" << Liveness->getVariable(i, this)->getName();
+      }
+    }
+    Str << "\n";
+  }
   // Dump list of successor nodes.
   if (Func->getContext()->isVerbose(IceV_Succs)) {
     Str << "    // succs = ";
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index 645dc57..ea50dca 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -45,6 +45,7 @@
   // Manage the instruction list.
   InstList &getInsts() { return Insts; }
   void appendInst(Inst *Inst);
+  void renumberInstructions();
 
   // Add a predecessor edge to the InEdges list for each of this
   // node's successors.
@@ -53,7 +54,11 @@
   void placePhiLoads();
   void placePhiStores();
   void deletePhis();
+  void doAddressOpt();
   void genCode();
+  void livenessLightweight();
+  bool liveness(Liveness *Liveness);
+  void livenessPostprocess(LivenessMode Mode, Liveness *Liveness);
   void emit(Cfg *Func) const;
   void dump(Cfg *Func) const;
 
diff --git a/src/IceDefs.h b/src/IceDefs.h
index 9870716..4c616f8 100644
--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -50,6 +50,8 @@
 class Inst;
 class InstPhi;
 class InstTarget;
+class LiveRange;
+class Liveness;
 class Operand;
 class TargetLowering;
 class Variable;
@@ -68,6 +70,23 @@
 // may be 64-bits wide) when we want to save space.
 typedef uint32_t SizeT;
 
+// InstNumberT is for holding an instruction number.  Instruction
+// numbers are used for representing Variable live ranges.
+typedef int32_t InstNumberT;
+
+enum LivenessMode {
+  // Basic version of live-range-end calculation.  Marks the last uses
+  // of variables based on dataflow analysis.  Records the set of
+  // live-in and live-out variables for each block.  Identifies and
+  // deletes dead instructions (primarily stores).
+  Liveness_Basic,
+
+  // In addition to Liveness_Basic, also calculate the complete
+  // live range for each variable in a form suitable for interference
+  // calculation and register allocation.
+  Liveness_Intervals
+};
+
 enum VerboseItem {
   IceV_None = 0,
   IceV_Instructions = 1 << 0,
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index 7a21b40..d77a5dd 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -12,6 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <ctype.h> // isdigit()
+
 #include "IceDefs.h"
 #include "IceTypes.h"
 #include "IceCfg.h"
@@ -129,8 +131,14 @@
     return NewName;
   }
 
-  ItemsParsed = sscanf(Name.c_str(), "_Z%u%s", &BaseLength, NameBase);
-  if (ItemsParsed == 2 && BaseLength <= strlen(NameBase)) {
+  // Artificially limit BaseLength to 9 digits (less than 1 billion)
+  // because sscanf behavior is undefined on integer overflow.  If
+  // there are more than 9 digits (which we test by looking at the
+  // beginning of NameBase), then we consider this a failure to parse
+  // a namespace mangling, and fall back to the simple prefixing.
+  ItemsParsed = sscanf(Name.c_str(), "_Z%9u%s", &BaseLength, NameBase);
+  if (ItemsParsed == 2 && BaseLength <= strlen(NameBase) &&
+      !isdigit(NameBase[0])) {
     // Transform _Z3barxyz ==> _ZN6Prefix3barExyz
     //                           ^^^^^^^^    ^
     // (splice in "N6Prefix", and insert "E" after "3bar")
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index 3f0c97d..12ca16c 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -15,6 +15,7 @@
 #include "IceCfg.h"
 #include "IceCfgNode.h"
 #include "IceInst.h"
+#include "IceLiveness.h"
 #include "IceOperand.h"
 
 namespace Ice {
@@ -74,21 +75,140 @@
 } // end of anonymous namespace
 
 Inst::Inst(Cfg *Func, InstKind Kind, SizeT MaxSrcs, Variable *Dest)
-    : Kind(Kind), Number(Func->newInstNumber()), Deleted(false),
+    : Kind(Kind), Number(Func->newInstNumber()), Deleted(false), Dead(false),
       HasSideEffects(false), Dest(Dest), MaxSrcs(MaxSrcs), NumSrcs(0),
-      Srcs(Func->allocateArrayOf<Operand *>(MaxSrcs)) {}
+      Srcs(Func->allocateArrayOf<Operand *>(MaxSrcs)), LiveRangesEnded(0) {}
+
+// Assign the instruction a new number.
+void Inst::renumber(Cfg *Func) {
+  Number = isDeleted() ? NumberDeleted : Func->newInstNumber();
+}
+
+// Delete the instruction if its tentative Dead flag is still set
+// after liveness analysis.
+void Inst::deleteIfDead() {
+  if (Dead)
+    setDeleted();
+}
+
+// If Src is a Variable, it returns true if this instruction ends
+// Src's live range.  Otherwise, returns false.
+bool Inst::isLastUse(const Operand *TestSrc) const {
+  if (LiveRangesEnded == 0)
+    return false; // early-exit optimization
+  if (const Variable *TestVar = llvm::dyn_cast<const Variable>(TestSrc)) {
+    LREndedBits Mask = LiveRangesEnded;
+    for (SizeT I = 0; I < getSrcSize(); ++I) {
+      Operand *Src = getSrc(I);
+      SizeT NumVars = Src->getNumVars();
+      for (SizeT J = 0; J < NumVars; ++J) {
+        const Variable *Var = Src->getVar(J);
+        if (Var == TestVar) {
+          // We've found where the variable is used in the instruction.
+          return Mask & 1;
+        }
+        Mask >>= 1;
+        if (Mask == 0)
+          return false; // another early-exit optimization
+      }
+    }
+  }
+  return false;
+}
 
 void Inst::updateVars(CfgNode *Node) {
   if (Dest)
     Dest->setDefinition(this, Node);
 
+  for (SizeT I = 0; I < getSrcSize(); ++I) {
+    Operand *Src = getSrc(I);
+    SizeT NumVars = Src->getNumVars();
+    for (SizeT J = 0; J < NumVars; ++J) {
+      Variable *Var = Src->getVar(J);
+      Var->setUse(this, Node);
+    }
+  }
+}
+
+void Inst::livenessLightweight(llvm::BitVector &Live) {
+  assert(!isDeleted());
+  if (llvm::isa<InstFakeKill>(this))
+    return;
+  resetLastUses();
   SizeT VarIndex = 0;
   for (SizeT I = 0; I < getSrcSize(); ++I) {
     Operand *Src = getSrc(I);
     SizeT NumVars = Src->getNumVars();
     for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
-      Variable *Var = Src->getVar(J);
-      Var->setUse(this, Node);
+      const Variable *Var = Src->getVar(J);
+      if (Var->isMultiblockLife())
+        continue;
+      SizeT Index = Var->getIndex();
+      if (Live[Index])
+        continue;
+      Live[Index] = true;
+      setLastUse(VarIndex);
+    }
+  }
+}
+
+void Inst::liveness(InstNumberT InstNumber, llvm::BitVector &Live,
+                    Liveness *Liveness, const CfgNode *Node) {
+  assert(!isDeleted());
+  if (llvm::isa<InstFakeKill>(this))
+    return;
+
+  std::vector<InstNumberT> &LiveBegin = Liveness->getLiveBegin(Node);
+  std::vector<InstNumberT> &LiveEnd = Liveness->getLiveEnd(Node);
+  Dead = false;
+  if (Dest) {
+    SizeT VarNum = Liveness->getLiveIndex(Dest);
+    if (Live[VarNum]) {
+      Live[VarNum] = false;
+      LiveBegin[VarNum] = InstNumber;
+    } else {
+      if (!hasSideEffects())
+        Dead = true;
+    }
+  }
+  if (Dead)
+    return;
+  // Phi arguments only get added to Live in the predecessor node, but
+  // we still need to update LiveRangesEnded.
+  bool IsPhi = llvm::isa<InstPhi>(this);
+  resetLastUses();
+  SizeT VarIndex = 0;
+  for (SizeT I = 0; I < getSrcSize(); ++I) {
+    Operand *Src = getSrc(I);
+    SizeT NumVars = Src->getNumVars();
+    for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
+      const Variable *Var = Src->getVar(J);
+      SizeT VarNum = Liveness->getLiveIndex(Var);
+      if (!Live[VarNum]) {
+        setLastUse(VarIndex);
+        if (!IsPhi) {
+          Live[VarNum] = true;
+          // For a variable in SSA form, its live range can end at
+          // most once in a basic block.  However, after lowering to
+          // two-address instructions, we end up with sequences like
+          // "t=b;t+=c;a=t" where t's live range begins and ends
+          // twice.  ICE only allows a variable to have a single
+          // liveness interval in a basic block (except for blocks
+          // where a variable is live-in and live-out but there is a
+          // gap in the middle, and except for the special
+          // InstFakeKill instruction that can appear multiple
+          // times in the same block).  Therefore, this lowered
+          // sequence needs to represent a single conservative live
+          // range for t.  Since the instructions are being traversed
+          // backwards, we make sure LiveEnd is only set once by
+          // setting it only when LiveEnd[VarNum]==0 (sentinel value).
+          // Note that it's OK to set LiveBegin multiple times because
+          // of the backwards traversal.
+          if (LiveEnd[VarNum] == 0) {
+            LiveEnd[VarNum] = InstNumber;
+          }
+        }
+      }
     }
   }
 }
@@ -192,6 +312,28 @@
   return NULL;
 }
 
+// Updates liveness for a particular operand based on the given
+// predecessor edge.  Doesn't mark the operand as live if the Phi
+// instruction is dead or deleted.
+void InstPhi::livenessPhiOperand(llvm::BitVector &Live, CfgNode *Target,
+                                 Liveness *Liveness) {
+  if (isDeleted() || Dead)
+    return;
+  for (SizeT I = 0; I < getSrcSize(); ++I) {
+    if (Labels[I] == Target) {
+      if (Variable *Var = llvm::dyn_cast<Variable>(getSrc(I))) {
+        SizeT SrcIndex = Liveness->getLiveIndex(Var);
+        if (!Live[SrcIndex]) {
+          setLastUse(I);
+          Live[SrcIndex] = true;
+        }
+      }
+      return;
+    }
+  }
+  llvm_unreachable("Phi operand not found for specified target node");
+}
+
 // Change "a=phi(...)" to "a_phi=phi(...)" and return a new
 // instruction "a=a_phi".
 Inst *InstPhi::lower(Cfg *Func, CfgNode *Node) {
@@ -294,8 +436,8 @@
     return;
   if (Func->getContext()->isVerbose(IceV_InstNumbers)) {
     char buf[30];
-    int32_t Number = getNumber();
-    if (Number < 0)
+    InstNumberT Number = getNumber();
+    if (Number == NumberDeleted)
       snprintf(buf, llvm::array_lengthof(buf), "[XXX]");
     else
       snprintf(buf, llvm::array_lengthof(buf), "[%3d]", Number);
@@ -305,6 +447,7 @@
   if (isDeleted())
     Str << "  //";
   dump(Func);
+  dumpExtras(Func);
   Str << "\n";
 }
 
@@ -319,6 +462,32 @@
   dumpSources(Func);
 }
 
+void Inst::dumpExtras(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  bool First = true;
+  // Print "LIVEEND={a,b,c}" for all source operands whose live ranges
+  // are known to end at this instruction.
+  if (Func->getContext()->isVerbose(IceV_Liveness)) {
+    for (SizeT I = 0; I < getSrcSize(); ++I) {
+      Operand *Src = getSrc(I);
+      SizeT NumVars = Src->getNumVars();
+      for (SizeT J = 0; J < NumVars; ++J) {
+        const Variable *Var = Src->getVar(J);
+        if (isLastUse(Var)) {
+          if (First)
+            Str << " // LIVEEND={";
+          else
+            Str << ",";
+          Var->dump(Func);
+          First = false;
+        }
+      }
+    }
+    if (!First)
+      Str << "}";
+  }
+}
+
 void Inst::dumpSources(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrDump();
   for (SizeT I = 0; I < getSrcSize(); ++I) {
@@ -553,4 +722,6 @@
   Inst::dump(Func);
 }
 
+void InstTarget::dumpExtras(const Cfg *Func) const { Inst::dumpExtras(Func); }
+
 } // end of namespace Ice
diff --git a/src/IceInst.h b/src/IceInst.h
index fd0eeea..a465eda 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -56,10 +56,14 @@
   };
   InstKind getKind() const { return Kind; }
 
-  int32_t getNumber() const { return Number; }
+  InstNumberT getNumber() const { return Number; }
+  void renumber(Cfg *Func);
+  static const InstNumberT NumberDeleted = -1;
+  static const InstNumberT NumberSentinel = 0;
 
   bool isDeleted() const { return Deleted; }
   void setDeleted() { Deleted = true; }
+  void deleteIfDead();
 
   bool hasSideEffects() const { return HasSideEffects; }
 
@@ -71,6 +75,8 @@
     return Srcs[I];
   }
 
+  bool isLastUse(const Operand *Src) const;
+
   // Returns a list of out-edges corresponding to a terminator
   // instruction, which is the last instruction of the block.
   virtual NodeList getTerminatorEdges() const {
@@ -88,8 +94,12 @@
   // basic blocks, i.e. used in a different block from their definition.
   void updateVars(CfgNode *Node);
 
+  void livenessLightweight(llvm::BitVector &Live);
+  void liveness(InstNumberT InstNumber, llvm::BitVector &Live,
+                Liveness *Liveness, const CfgNode *Node);
   virtual void emit(const Cfg *Func) const;
   virtual void dump(const Cfg *Func) const;
+  virtual void dumpExtras(const Cfg *Func) const;
   void dumpDecorated(const Cfg *Func) const;
   void emitSources(const Cfg *Func) const;
   void dumpSources(const Cfg *Func) const;
@@ -105,15 +115,22 @@
     assert(NumSrcs < MaxSrcs);
     Srcs[NumSrcs++] = Src;
   }
+  void setLastUse(SizeT VarIndex) {
+    if (VarIndex < CHAR_BIT * sizeof(LiveRangesEnded))
+      LiveRangesEnded |= (((LREndedBits)1u) << VarIndex);
+  }
+  void resetLastUses() { LiveRangesEnded = 0; }
   // The destroy() method lets the instruction cleanly release any
   // memory that was allocated via the Cfg's allocator.
   virtual void destroy(Cfg *Func) { Func->deallocateArrayOf<Operand *>(Srcs); }
 
   const InstKind Kind;
   // Number is the instruction number for describing live ranges.
-  int32_t Number;
+  InstNumberT Number;
   // Deleted means irrevocably deleted.
   bool Deleted;
+  // Dead means pending deletion after liveness analysis converges.
+  bool Dead;
   // HasSideEffects means the instruction is something like a function
   // call or a volatile load that can't be removed even if its Dest
   // variable is not live.
@@ -124,6 +141,18 @@
   SizeT NumSrcs;
   Operand **Srcs;
 
+  // LiveRangesEnded marks which Variables' live ranges end in this
+  // instruction.  An instruction can have an arbitrary number of
+  // source operands (e.g. a call instruction), and each source
+  // operand can contain 0 or 1 Variable (and target-specific operands
+  // could contain more than 1 Variable).  All the variables in an
+  // instruction are conceptually flattened and each variable is
+  // mapped to one bit position of the LiveRangesEnded bit vector.
+  // Only the first CHAR_BIT * sizeof(LREndedBits) variables are
+  // tracked this way.
+  typedef uint32_t LREndedBits; // only first 32 src operands tracked, sorry
+  LREndedBits LiveRangesEnded;
+
 private:
   Inst(const Inst &) LLVM_DELETED_FUNCTION;
   Inst &operator=(const Inst &) LLVM_DELETED_FUNCTION;
@@ -393,6 +422,8 @@
   }
   void addArgument(Operand *Source, CfgNode *Label);
   Operand *getOperandForTarget(CfgNode *Target) const;
+  void livenessPhiOperand(llvm::BitVector &Live, CfgNode *Target,
+                          Liveness *Liveness);
   Inst *lower(Cfg *Func, CfgNode *Node);
   virtual void dump(const Cfg *Func) const;
   static bool classof(const Inst *Inst) { return Inst->getKind() == Phi; }
@@ -626,6 +657,7 @@
 public:
   virtual void emit(const Cfg *Func) const = 0;
   virtual void dump(const Cfg *Func) const;
+  virtual void dumpExtras(const Cfg *Func) const;
   static bool classof(const Inst *Inst) { return Inst->getKind() >= Target; }
 
 protected:
diff --git a/src/IceLiveness.cpp b/src/IceLiveness.cpp
new file mode 100644
index 0000000..47ef358
--- /dev/null
+++ b/src/IceLiveness.cpp
@@ -0,0 +1,116 @@
+//===- subzero/src/IceLiveness.cpp - Liveness analysis implementation -----===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides some of the support for the Liveness class.  In
+// particular, it handles the sparsity representation of the mapping
+// between Variables and CfgNodes.  The idea is that since most
+// variables are used only within a single basic block, we can
+// partition the variables into "local" and "global" sets.  Instead of
+// sizing and indexing vectors according to Variable::Number, we
+// create a mapping such that global variables are mapped to low
+// indexes that are common across nodes, and local variables are
+// mapped to a higher index space that is shared across nodes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IceDefs.h"
+#include "IceCfg.h"
+#include "IceCfgNode.h"
+#include "IceInst.h"
+#include "IceLiveness.h"
+#include "IceOperand.h"
+
+namespace Ice {
+
+void Liveness::init() {
+  // Initialize most of the container sizes.
+  SizeT NumVars = Func->getVariables().size();
+  SizeT NumNodes = Func->getNumNodes();
+  Nodes.resize(NumNodes);
+  VarToLiveMap.resize(NumVars);
+  if (Mode == Liveness_Intervals)
+    LiveRanges.resize(NumVars);
+
+  // Count the number of globals, and the number of locals for each
+  // block.
+  for (SizeT i = 0; i < NumVars; ++i) {
+    Variable *Var = Func->getVariables()[i];
+    if (Var->isMultiblockLife()) {
+      ++NumGlobals;
+    } else {
+      SizeT Index = Var->getLocalUseNode()->getIndex();
+      ++Nodes[Index].NumLocals;
+    }
+  }
+
+  // Resize each LivenessNode::LiveToVarMap, and the global
+  // LiveToVarMap.  Reset the counts to 0.
+  for (SizeT i = 0; i < NumNodes; ++i) {
+    Nodes[i].LiveToVarMap.assign(Nodes[i].NumLocals, NULL);
+    Nodes[i].NumLocals = 0;
+  }
+  LiveToVarMap.assign(NumGlobals, NULL);
+
+  // Sort each variable into the appropriate LiveToVarMap.  Also set
+  // VarToLiveMap.
+  SizeT TmpNumGlobals = 0;
+  for (SizeT i = 0; i < NumVars; ++i) {
+    Variable *Var = Func->getVariables()[i];
+    SizeT VarIndex = Var->getIndex();
+    SizeT LiveIndex;
+    if (Var->isMultiblockLife()) {
+      LiveIndex = TmpNumGlobals++;
+      LiveToVarMap[LiveIndex] = Var;
+    } else {
+      SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
+      LiveIndex = Nodes[NodeIndex].NumLocals++;
+      Nodes[NodeIndex].LiveToVarMap[LiveIndex] = Var;
+      LiveIndex += NumGlobals;
+    }
+    VarToLiveMap[VarIndex] = LiveIndex;
+  }
+  assert(NumGlobals == TmpNumGlobals);
+
+  // Process each node.
+  const NodeList &LNodes = Func->getNodes();
+  SizeT NumLNodes = LNodes.size();
+  for (SizeT i = 0; i < NumLNodes; ++i) {
+    LivenessNode &Node = Nodes[LNodes[i]->getIndex()];
+    // NumLocals, LiveToVarMap already initialized
+    Node.LiveIn.resize(NumGlobals);
+    Node.LiveOut.resize(NumGlobals);
+    // LiveBegin and LiveEnd are reinitialized before each pass over
+    // the block.
+  }
+}
+
+Variable *Liveness::getVariable(SizeT LiveIndex, const CfgNode *Node) const {
+  if (LiveIndex < NumGlobals)
+    return LiveToVarMap[LiveIndex];
+  SizeT NodeIndex = Node->getIndex();
+  return Nodes[NodeIndex].LiveToVarMap[LiveIndex - NumGlobals];
+}
+
+SizeT Liveness::getLiveIndex(const Variable *Var) const {
+  return VarToLiveMap[Var->getIndex()];
+}
+
+void Liveness::addLiveRange(Variable *Var, InstNumberT Start, InstNumberT End,
+                            uint32_t WeightDelta) {
+  LiveRange &LiveRange = LiveRanges[Var->getIndex()];
+  assert(WeightDelta != RegWeight::Inf);
+  LiveRange.addSegment(Start, End);
+  LiveRange.addWeight(WeightDelta);
+}
+
+LiveRange &Liveness::getLiveRange(Variable *Var) {
+  return LiveRanges[Var->getIndex()];
+}
+
+} // end of namespace Ice
diff --git a/src/IceLiveness.h b/src/IceLiveness.h
new file mode 100644
index 0000000..e58f426
--- /dev/null
+++ b/src/IceLiveness.h
@@ -0,0 +1,100 @@
+//===- subzero/src/IceLiveness.h - Liveness analysis ------------*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Liveness and LivenessNode classes,
+// which are used for liveness analysis.  The node-specific
+// information tracked for each Variable includes whether it is
+// live on entry, whether it is live on exit, the instruction number
+// that starts its live range, and the instruction number that ends
+// its live range.  At the Cfg level, the actual live intervals are
+// recorded.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICELIVENESS_H
+#define SUBZERO_SRC_ICELIVENESS_H
+
+#include "IceDefs.h"
+#include "IceTypes.h"
+
+namespace Ice {
+
+class LivenessNode {
+public:
+  LivenessNode() : NumLocals(0) {}
+  // NumLocals is the number of Variables local to this block.
+  SizeT NumLocals;
+  // LiveToVarMap maps a liveness bitvector index to a Variable.  This
+  // is generally just for printing/dumping.  The index should be less
+  // than NumLocals + Liveness::NumGlobals.
+  std::vector<Variable *> LiveToVarMap;
+  // LiveIn and LiveOut track the in- and out-liveness of the global
+  // variables.  The size of each vector is
+  // LivenessNode::NumGlobals.
+  llvm::BitVector LiveIn, LiveOut;
+  // LiveBegin and LiveEnd track the instruction numbers of the start
+  // and end of each variable's live range within this block.  The
+  // size of each vector is NumLocals + Liveness::NumGlobals.
+  std::vector<InstNumberT> LiveBegin, LiveEnd;
+
+private:
+  // TODO: Disable these constructors when Liveness::Nodes is no
+  // longer an STL container.
+  // LivenessNode(const LivenessNode &) LLVM_DELETED_FUNCTION;
+  // LivenessNode &operator=(const LivenessNode &) LLVM_DELETED_FUNCTION;
+};
+
+class Liveness {
+public:
+  Liveness(Cfg *Func, LivenessMode Mode)
+      : Func(Func), Mode(Mode), NumGlobals(0) {}
+  void init();
+  Variable *getVariable(SizeT LiveIndex, const CfgNode *Node) const;
+  SizeT getLiveIndex(const Variable *Var) const;
+  SizeT getNumGlobalVars() const { return NumGlobals; }
+  SizeT getNumVarsInNode(const CfgNode *Node) const {
+    return NumGlobals + Nodes[Node->getIndex()].NumLocals;
+  }
+  llvm::BitVector &getLiveIn(const CfgNode *Node) {
+    return Nodes[Node->getIndex()].LiveIn;
+  }
+  llvm::BitVector &getLiveOut(const CfgNode *Node) {
+    return Nodes[Node->getIndex()].LiveOut;
+  }
+  std::vector<InstNumberT> &getLiveBegin(const CfgNode *Node) {
+    return Nodes[Node->getIndex()].LiveBegin;
+  }
+  std::vector<InstNumberT> &getLiveEnd(const CfgNode *Node) {
+    return Nodes[Node->getIndex()].LiveEnd;
+  }
+  LiveRange &getLiveRange(Variable *Var);
+  void addLiveRange(Variable *Var, InstNumberT Start, InstNumberT End,
+                    uint32_t WeightDelta);
+
+private:
+  Cfg *Func;
+  LivenessMode Mode;
+  SizeT NumGlobals;
+  // Size of Nodes is Cfg::Nodes.size().
+  std::vector<LivenessNode> Nodes;
+  // VarToLiveMap maps a Variable's Variable::Number to its live index
+  // within its basic block.
+  std::vector<SizeT> VarToLiveMap;
+  // LiveToVarMap is analogous to LivenessNode::LiveToVarMap, but for
+  // non-local variables.
+  std::vector<Variable *> LiveToVarMap;
+  // LiveRanges maps a Variable::Number to its live range.
+  std::vector<LiveRange> LiveRanges;
+  Liveness(const Liveness &) LLVM_DELETED_FUNCTION;
+  Liveness &operator=(const Liveness &) LLVM_DELETED_FUNCTION;
+};
+
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICELIVENESS_H
diff --git a/src/IceOperand.cpp b/src/IceOperand.cpp
index 56520d3..02e85c7 100644
--- a/src/IceOperand.cpp
+++ b/src/IceOperand.cpp
@@ -7,9 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the Operand class and its
-// target-independent subclasses, primarily for the methods of the
-// Variable class.
+// This file implements the Operand class and its target-independent
+// subclasses, primarily for the methods of the Variable class.
 //
 //===----------------------------------------------------------------------===//
 
@@ -36,6 +35,109 @@
   return !(B < A) && !(A < B);
 }
 
+void LiveRange::addSegment(InstNumberT Start, InstNumberT End) {
+#ifdef USE_SET
+  RangeElementType Element(Start, End);
+  RangeType::iterator Next = Range.lower_bound(Element);
+  assert(Next == Range.upper_bound(Element)); // Element not already present
+
+  // Beginning of code that merges contiguous segments.  TODO: change
+  // "if(true)" to "if(false)" to see if this extra optimization code
+  // gives any performance gain, or is just destabilizing.
+  if (true) {
+    RangeType::iterator FirstDelete = Next;
+    RangeType::iterator Prev = Next;
+    bool hasPrev = (Next != Range.begin());
+    bool hasNext = (Next != Range.end());
+    if (hasPrev)
+      --Prev;
+    // See if Element and Next should be joined.
+    if (hasNext && End == Next->first) {
+      Element.second = Next->second;
+      ++Next;
+    }
+    // See if Prev and Element should be joined.
+    if (hasPrev && Prev->second == Start) {
+      Element.first = Prev->first;
+      FirstDelete = Prev;
+    }
+    Range.erase(FirstDelete, Next);
+  }
+  // End of code that merges contiguous segments.
+
+  Range.insert(Next, Element);
+#else
+  if (Range.empty()) {
+    Range.push_back(RangeElementType(Start, End));
+    return;
+  }
+  // Special case for faking in-arg liveness.
+  if (End < Range.front().first) {
+    assert(Start < 0);
+    Range.push_front(RangeElementType(Start, End));
+    return;
+  }
+  InstNumberT CurrentEnd = Range.back().second;
+  assert(Start >= CurrentEnd);
+  // Check for merge opportunity.
+  if (Start == CurrentEnd) {
+    Range.back().second = End;
+    return;
+  }
+  Range.push_back(RangeElementType(Start, End));
+#endif
+}
+
+// Returns true if this live range ends before Other's live range
+// starts.  This means that the highest instruction number in this
+// live range is less than or equal to the lowest instruction number
+// of the Other live range.
+bool LiveRange::endsBefore(const LiveRange &Other) const {
+  // Neither range should be empty, but let's be graceful.
+  if (Range.empty() || Other.Range.empty())
+    return true;
+  InstNumberT MyEnd = (*Range.rbegin()).second;
+  InstNumberT OtherStart = (*Other.Range.begin()).first;
+  return MyEnd <= OtherStart;
+}
+
+// Returns true if there is any overlap between the two live ranges.
+bool LiveRange::overlaps(const LiveRange &Other) const {
+  // Do a two-finger walk through the two sorted lists of segments.
+  RangeType::const_iterator I1 = Range.begin(), I2 = Other.Range.begin();
+  RangeType::const_iterator E1 = Range.end(), E2 = Other.Range.end();
+  while (I1 != E1 && I2 != E2) {
+    if (I1->second <= I2->first) {
+      ++I1;
+      continue;
+    }
+    if (I2->second <= I1->first) {
+      ++I2;
+      continue;
+    }
+    return true;
+  }
+  return false;
+}
+
+bool LiveRange::overlaps(InstNumberT OtherBegin) const {
+  LiveRange Temp;
+  Temp.addSegment(OtherBegin, OtherBegin + 1);
+  return overlaps(Temp);
+}
+
+// Returns true if the live range contains the given instruction
+// number.  This is only used for validating the live range
+// calculation.
+bool LiveRange::containsValue(InstNumberT Value) const {
+  for (RangeType::const_iterator I = Range.begin(), E = Range.end(); I != E;
+       ++I) {
+    if (I->first <= Value && Value <= I->second)
+      return true;
+  }
+  return false;
+}
+
 void Variable::setUse(const Inst *Inst, const CfgNode *Node) {
   if (DefNode == NULL)
     return;
@@ -115,12 +217,12 @@
   }
 }
 
-void ConstantRelocatable::emit(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrEmit();
+void ConstantRelocatable::emit(GlobalContext *Ctx) const {
+  Ostream &Str = Ctx->getStrEmit();
   if (SuppressMangling)
     Str << Name;
   else
-    Str << Func->getContext()->mangleName(Name);
+    Str << Ctx->mangleName(Name);
   if (Offset) {
     if (Offset > 0)
       Str << "+";
@@ -128,13 +230,28 @@
   }
 }
 
-void ConstantRelocatable::dump(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrDump();
+void ConstantRelocatable::dump(GlobalContext *Ctx) const {
+  Ostream &Str = Ctx->getStrDump();
   Str << "@" << Name;
   if (Offset)
     Str << "+" << Offset;
 }
 
+void LiveRange::dump(Ostream &Str) const {
+  Str << "(weight=" << Weight << ") ";
+  for (RangeType::const_iterator I = Range.begin(), E = Range.end(); I != E;
+       ++I) {
+    if (I != Range.begin())
+      Str << ", ";
+    Str << "[" << (*I).first << ":" << (*I).second << ")";
+  }
+}
+
+Ostream &operator<<(Ostream &Str, const LiveRange &L) {
+  L.dump(Str);
+  return Str;
+}
+
 Ostream &operator<<(Ostream &Str, const RegWeight &W) {
   if (W.getWeight() == RegWeight::Inf)
     Str << "Inf";
diff --git a/src/IceOperand.h b/src/IceOperand.h
index ef4413f..54ac48b 100644
--- a/src/IceOperand.h
+++ b/src/IceOperand.h
@@ -81,8 +81,10 @@
 class Constant : public Operand {
 public:
   uint32_t getPoolEntryID() const { return PoolEntryID; }
-  virtual void emit(const Cfg *Func) const = 0;
-  virtual void dump(const Cfg *Func) const = 0;
+  virtual void emit(const Cfg *Func) const { emit(Func->getContext()); }
+  virtual void dump(const Cfg *Func) const { dump(Func->getContext()); }
+  virtual void emit(GlobalContext *Ctx) const = 0;
+  virtual void dump(GlobalContext *Ctx) const = 0;
 
   static bool classof(const Operand *Operand) {
     OperandKind Kind = Operand->getKind();
@@ -116,12 +118,14 @@
         ConstantPrimitive(Ty, Value, PoolEntryID);
   }
   T getValue() const { return Value; }
-  virtual void emit(const Cfg *Func) const {
-    Ostream &Str = Func->getContext()->getStrEmit();
+  using Constant::emit;
+  virtual void emit(GlobalContext *Ctx) const {
+    Ostream &Str = Ctx->getStrEmit();
     Str << getValue();
   }
-  virtual void dump(const Cfg *Func) const {
-    Ostream &Str = Func->getContext()->getStrDump();
+  using Constant::dump;
+  virtual void dump(GlobalContext *Ctx) const {
+    Ostream &Str = Ctx->getStrDump();
     Str << getValue();
   }
 
@@ -178,8 +182,10 @@
   IceString getName() const { return Name; }
   void setSuppressMangling(bool Value) { SuppressMangling = Value; }
   bool getSuppressMangling() const { return SuppressMangling; }
-  virtual void emit(const Cfg *Func) const;
-  virtual void dump(const Cfg *Func) const;
+  using Constant::emit;
+  using Constant::dump;
+  virtual void emit(GlobalContext *Ctx) const;
+  virtual void dump(GlobalContext *Ctx) const;
 
   static bool classof(const Operand *Operand) {
     OperandKind Kind = Operand->getKind();
@@ -228,6 +234,55 @@
 bool operator<=(const RegWeight &A, const RegWeight &B);
 bool operator==(const RegWeight &A, const RegWeight &B);
 
+// LiveRange is a set of instruction number intervals representing
+// a variable's live range.  Generally there is one interval per basic
+// block where the variable is live, but adjacent intervals get
+// coalesced into a single interval.  LiveRange also includes a
+// weight, in case e.g. we want a live range to have higher weight
+// inside a loop.
+class LiveRange {
+public:
+  LiveRange() : Weight(0) {}
+
+  void reset() {
+    Range.clear();
+    Weight.setWeight(0);
+  }
+  void addSegment(InstNumberT Start, InstNumberT End);
+
+  bool endsBefore(const LiveRange &Other) const;
+  bool overlaps(const LiveRange &Other) const;
+  bool overlaps(InstNumberT OtherBegin) const;
+  bool containsValue(InstNumberT Value) const;
+  bool isEmpty() const { return Range.empty(); }
+  InstNumberT getStart() const {
+    return Range.empty() ? -1 : Range.begin()->first;
+  }
+
+  RegWeight getWeight() const { return Weight; }
+  void setWeight(const RegWeight &NewWeight) { Weight = NewWeight; }
+  void addWeight(uint32_t Delta) { Weight.addWeight(Delta); }
+  void dump(Ostream &Str) const;
+
+  // Defining USE_SET uses std::set to hold the segments instead of
+  // std::list.  Using std::list will be slightly faster, but is more
+  // restrictive because new segments cannot be added in the middle.
+
+  //#define USE_SET
+
+private:
+  typedef std::pair<InstNumberT, InstNumberT> RangeElementType;
+#ifdef USE_SET
+  typedef std::set<RangeElementType> RangeType;
+#else
+  typedef std::list<RangeElementType> RangeType;
+#endif
+  RangeType Range;
+  RegWeight Weight;
+};
+
+Ostream &operator<<(Ostream &Str, const LiveRange &L);
+
 // Variable represents an operand that is register-allocated or
 // stack-allocated.  If it is register-allocated, it will ultimately
 // have a non-negative RegNum field.
@@ -263,6 +318,9 @@
     assert(!hasReg() || RegNum == NewRegNum);
     RegNum = NewRegNum;
   }
+  bool hasRegTmp() const { return getRegNumTmp() != NoRegister; }
+  int32_t getRegNumTmp() const { return RegNumTmp; }
+  void setRegNumTmp(int32_t NewRegNum) { RegNumTmp = NewRegNum; }
 
   RegWeight getWeight() const { return Weight; }
   void setWeight(uint32_t NewWeight) { Weight = NewWeight; }
@@ -275,6 +333,19 @@
     AllowRegisterOverlap = Overlap;
   }
 
+  const LiveRange &getLiveRange() const { return Live; }
+  void setLiveRange(const LiveRange &Range) { Live = Range; }
+  void resetLiveRange() { Live.reset(); }
+  void addLiveRange(InstNumberT Start, InstNumberT End, uint32_t WeightDelta) {
+    assert(WeightDelta != RegWeight::Inf);
+    Live.addSegment(Start, End);
+    if (Weight.isInf())
+      Live.setWeight(RegWeight::Inf);
+    else
+      Live.addWeight(WeightDelta * Weight.getWeight());
+  }
+  void setLiveRangeInfiniteWeight() { Live.setWeight(RegWeight::Inf); }
+
   Variable *getLo() const { return LoVar; }
   Variable *getHi() const { return HiVar; }
   void setLoHi(Variable *Lo, Variable *Hi) {
@@ -304,8 +375,8 @@
   Variable(Type Ty, const CfgNode *Node, SizeT Index, const IceString &Name)
       : Operand(kVariable, Ty), Number(Index), Name(Name), DefInst(NULL),
         DefNode(Node), IsArgument(false), StackOffset(0), RegNum(NoRegister),
-        Weight(1), RegisterPreference(NULL), AllowRegisterOverlap(false),
-        LoVar(NULL), HiVar(NULL) {
+        RegNumTmp(NoRegister), Weight(1), RegisterPreference(NULL),
+        AllowRegisterOverlap(false), LoVar(NULL), HiVar(NULL) {
     Vars = VarsReal;
     Vars[0] = this;
     NumVars = 1;
@@ -334,6 +405,8 @@
   // RegNum is the allocated register, or NoRegister if it isn't
   // register-allocated.
   int32_t RegNum;
+  // RegNumTmp is the tentative assignment during register allocation.
+  int32_t RegNumTmp;
   RegWeight Weight; // Register allocation priority
   // RegisterPreference says that if possible, the register allocator
   // should prefer the register that was assigned to this linked
@@ -345,6 +418,7 @@
   // RegisterPreference and "share" a register even if the two live
   // ranges overlap.
   bool AllowRegisterOverlap;
+  LiveRange Live;
   // LoVar and HiVar are needed for lowering from 64 to 32 bits.  When
   // lowering from I64 to I32 on a 32-bit architecture, we split the
   // variable into two machine-size pieces.  LoVar is the low-order
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
new file mode 100644
index 0000000..f610d96
--- /dev/null
+++ b/src/IceRegAlloc.cpp
@@ -0,0 +1,461 @@
+//===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LinearScan class, which performs the
+// linear-scan register allocation after liveness analysis has been
+// performed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IceCfg.h"
+#include "IceInst.h"
+#include "IceOperand.h"
+#include "IceRegAlloc.h"
+#include "IceTargetLowering.h"
+
+namespace Ice {
+
+// Implements the linear-scan algorithm.  Based on "Linear Scan
+// Register Allocation in the Context of SSA Form and Register
+// Constraints" by Hanspeter Mössenböck and Michael Pfeiffer,
+// ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF .  This
+// implementation is modified to take affinity into account and allow
+// two interfering variables to share the same register in certain
+// cases.
+//
+// Requires running Cfg::liveness(Liveness_RangesFull) in
+// preparation.  Results are assigned to Variable::RegNum for each
+// Variable.
+void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
+  assert(RegMaskFull.any()); // Sanity check
+  Unhandled.clear();
+  Handled.clear();
+  Inactive.clear();
+  Active.clear();
+  Ostream &Str = Func->getContext()->getStrDump();
+  Func->setCurrentNode(NULL);
+
+  // Gather the live ranges of all variables and add them to the
+  // Unhandled set.  TODO: Unhandled is a set<> which is based on a
+  // balanced binary tree, so inserting live ranges for N variables is
+  // O(N log N) complexity.  N may be proportional to the number of
+  // instructions, thanks to temporary generation during lowering.  As
+  // a result, it may be useful to design a better data structure for
+  // storing Func->getVariables().
+  const VarList &Vars = Func->getVariables();
+  for (VarList::const_iterator I = Vars.begin(), E = Vars.end(); I != E; ++I) {
+    Variable *Var = *I;
+    // Explicitly don't consider zero-weight variables, which are
+    // meant to be spill slots.
+    if (Var->getWeight() == RegWeight::Zero)
+      continue;
+    // Don't bother if the variable has a null live range, which means
+    // it was never referenced.
+    if (Var->getLiveRange().isEmpty())
+      continue;
+    Unhandled.insert(LiveRangeWrapper(Var));
+    if (Var->hasReg()) {
+      Var->setRegNumTmp(Var->getRegNum());
+      Var->setLiveRangeInfiniteWeight();
+    }
+  }
+
+  // RegUses[I] is the number of live ranges (variables) that register
+  // I is currently assigned to.  It can be greater than 1 as a result
+  // of Variable::AllowRegisterOverlap.
+  std::vector<int> RegUses(RegMaskFull.size());
+  // Unhandled is already set to all ranges in increasing order of
+  // start points.
+  assert(Active.empty());
+  assert(Inactive.empty());
+  assert(Handled.empty());
+  UnorderedRanges::iterator Next;
+
+  while (!Unhandled.empty()) {
+    LiveRangeWrapper Cur = *Unhandled.begin();
+    Unhandled.erase(Unhandled.begin());
+    if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+      Str << "\nConsidering  ";
+      Cur.dump(Func);
+      Str << "\n";
+    }
+    const llvm::SmallBitVector RegMask =
+        RegMaskFull &
+        Func->getTarget()->getRegisterSetForType(Cur.Var->getType());
+
+    // Check for precolored ranges.  If Cur is precolored, it
+    // definitely gets that register.  Previously processed live
+    // ranges would have avoided that register due to it being
+    // precolored.  Future processed live ranges won't evict that
+    // register because the live range has infinite weight.
+    if (Cur.Var->hasReg()) {
+      int32_t RegNum = Cur.Var->getRegNum();
+      // RegNumTmp should have already been set above.
+      assert(Cur.Var->getRegNumTmp() == RegNum);
+      if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+        Str << "Precoloring  ";
+        Cur.dump(Func);
+        Str << "\n";
+      }
+      Active.push_back(Cur);
+      assert(RegUses[RegNum] >= 0);
+      ++RegUses[RegNum];
+      continue;
+    }
+
+    // Check for active ranges that have expired or become inactive.
+    for (UnorderedRanges::iterator I = Active.begin(), E = Active.end(); I != E;
+         I = Next) {
+      Next = I;
+      ++Next;
+      LiveRangeWrapper Item = *I;
+      bool Moved = false;
+      if (Item.endsBefore(Cur)) {
+        // Move Item from Active to Handled list.
+        if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+          Str << "Expiring     ";
+          Item.dump(Func);
+          Str << "\n";
+        }
+        Active.erase(I);
+        Handled.push_back(Item);
+        Moved = true;
+      } else if (!Item.overlapsStart(Cur)) {
+        // Move Item from Active to Inactive list.
+        if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+          Str << "Inactivating ";
+          Item.dump(Func);
+          Str << "\n";
+        }
+        Active.erase(I);
+        Inactive.push_back(Item);
+        Moved = true;
+      }
+      if (Moved) {
+        // Decrement Item from RegUses[].
+        assert(Item.Var->hasRegTmp());
+        int32_t RegNum = Item.Var->getRegNumTmp();
+        --RegUses[RegNum];
+        assert(RegUses[RegNum] >= 0);
+      }
+    }
+
+    // Check for inactive ranges that have expired or reactivated.
+    for (UnorderedRanges::iterator I = Inactive.begin(), E = Inactive.end();
+         I != E; I = Next) {
+      Next = I;
+      ++Next;
+      LiveRangeWrapper Item = *I;
+      if (Item.endsBefore(Cur)) {
+        // Move Item from Inactive to Handled list.
+        if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+          Str << "Expiring     ";
+          Item.dump(Func);
+          Str << "\n";
+        }
+        Inactive.erase(I);
+        Handled.push_back(Item);
+      } else if (Item.overlapsStart(Cur)) {
+        // Move Item from Inactive to Active list.
+        if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+          Str << "Reactivating ";
+          Item.dump(Func);
+          Str << "\n";
+        }
+        Inactive.erase(I);
+        Active.push_back(Item);
+        // Increment Item in RegUses[].
+        assert(Item.Var->hasRegTmp());
+        int32_t RegNum = Item.Var->getRegNumTmp();
+        assert(RegUses[RegNum] >= 0);
+        ++RegUses[RegNum];
+      }
+    }
+
+    // Calculate available registers into Free[].
+    llvm::SmallBitVector Free = RegMask;
+    for (SizeT i = 0; i < RegMask.size(); ++i) {
+      if (RegUses[i] > 0)
+        Free[i] = false;
+    }
+
+    // Remove registers from the Free[] list where an Inactive range
+    // overlaps with the current range.
+    for (UnorderedRanges::const_iterator I = Inactive.begin(),
+                                         E = Inactive.end();
+         I != E; ++I) {
+      LiveRangeWrapper Item = *I;
+      if (Item.overlaps(Cur)) {
+        int32_t RegNum = Item.Var->getRegNumTmp();
+        // Don't assert(Free[RegNum]) because in theory (though
+        // probably never in practice) there could be two inactive
+        // variables that were allowed marked with
+        // AllowRegisterOverlap.
+        Free[RegNum] = false;
+      }
+    }
+
+    // Remove registers from the Free[] list where an Unhandled range
+    // overlaps with the current range and is precolored.
+    // Cur.endsBefore(*I) is an early exit check that turns a
+    // guaranteed O(N^2) algorithm into expected linear complexity.
+    for (OrderedRanges::const_iterator I = Unhandled.begin(),
+                                       E = Unhandled.end();
+         I != E && !Cur.endsBefore(*I); ++I) {
+      LiveRangeWrapper Item = *I;
+      if (Item.Var->hasReg() && Item.overlaps(Cur)) {
+        Free[Item.Var->getRegNum()] = false; // Note: getRegNum not getRegNumTmp
+      }
+    }
+
+    // Print info about physical register availability.
+    if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+      for (SizeT i = 0; i < RegMask.size(); ++i) {
+        if (RegMask[i]) {
+          Str << Func->getTarget()->getRegName(i, IceType_i32)
+              << "(U=" << RegUses[i] << ",F=" << Free[i] << ") ";
+        }
+      }
+      Str << "\n";
+    }
+
+    Variable *Prefer = Cur.Var->getPreferredRegister();
+    int32_t PreferReg = Prefer && Prefer->hasRegTmp() ? Prefer->getRegNumTmp()
+                                                      : Variable::NoRegister;
+    if (PreferReg != Variable::NoRegister &&
+        (Cur.Var->getRegisterOverlap() || Free[PreferReg])) {
+      // First choice: a preferred register that is either free or is
+      // allowed to overlap with its linked variable.
+      Cur.Var->setRegNumTmp(PreferReg);
+      if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+        Str << "Preferring   ";
+        Cur.dump(Func);
+        Str << "\n";
+      }
+      assert(RegUses[PreferReg] >= 0);
+      ++RegUses[PreferReg];
+      Active.push_back(Cur);
+    } else if (Free.any()) {
+      // Second choice: any free register.  TODO: After explicit
+      // affinity is considered, is there a strategy better than just
+      // picking the lowest-numbered available register?
+      int32_t RegNum = Free.find_first();
+      Cur.Var->setRegNumTmp(RegNum);
+      if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+        Str << "Allocating   ";
+        Cur.dump(Func);
+        Str << "\n";
+      }
+      assert(RegUses[RegNum] >= 0);
+      ++RegUses[RegNum];
+      Active.push_back(Cur);
+    } else {
+      // Fallback: there are no free registers, so we look for the
+      // lowest-weight register and see if Cur has higher weight.
+      std::vector<RegWeight> Weights(RegMask.size());
+      // Check Active ranges.
+      for (UnorderedRanges::const_iterator I = Active.begin(), E = Active.end();
+           I != E; ++I) {
+        LiveRangeWrapper Item = *I;
+        assert(Item.overlaps(Cur));
+        int32_t RegNum = Item.Var->getRegNumTmp();
+        assert(Item.Var->hasRegTmp());
+        Weights[RegNum].addWeight(Item.range().getWeight());
+      }
+      // Same as above, but check Inactive ranges instead of Active.
+      for (UnorderedRanges::const_iterator I = Inactive.begin(),
+                                           E = Inactive.end();
+           I != E; ++I) {
+        LiveRangeWrapper Item = *I;
+        int32_t RegNum = Item.Var->getRegNumTmp();
+        assert(Item.Var->hasRegTmp());
+        if (Item.overlaps(Cur))
+          Weights[RegNum].addWeight(Item.range().getWeight());
+      }
+      // Check Unhandled ranges that overlap Cur and are precolored.
+      // Cur.endsBefore(*I) is an early exit check that turns a
+      // guaranteed O(N^2) algorithm into expected linear complexity.
+      for (OrderedRanges::const_iterator I = Unhandled.begin(),
+                                         E = Unhandled.end();
+           I != E && !Cur.endsBefore(*I); ++I) {
+        LiveRangeWrapper Item = *I;
+        int32_t RegNum = Item.Var->getRegNumTmp();
+        if (RegNum < 0)
+          continue;
+        if (Item.overlaps(Cur))
+          Weights[RegNum].setWeight(RegWeight::Inf);
+      }
+
+      // All the weights are now calculated.  Find the register with
+      // smallest weight.
+      int32_t MinWeightIndex = RegMask.find_first();
+      // MinWeightIndex must be valid because of the initial
+      // RegMask.any() test.
+      assert(MinWeightIndex >= 0);
+      for (SizeT i = MinWeightIndex + 1; i < Weights.size(); ++i) {
+        if (RegMask[i] && Weights[i] < Weights[MinWeightIndex])
+          MinWeightIndex = i;
+      }
+
+      if (Cur.range().getWeight() <= Weights[MinWeightIndex]) {
+        // Cur doesn't have priority over any other live ranges, so
+        // don't allocate any register to it, and move it to the
+        // Handled state.
+        Handled.push_back(Cur);
+        if (Cur.range().getWeight().isInf()) {
+          Func->setError("Unable to find a physical register for an "
+                         "infinite-weight live range");
+        }
+      } else {
+        // Evict all live ranges in Active that register number
+        // MinWeightIndex is assigned to.
+        for (UnorderedRanges::iterator I = Active.begin(), E = Active.end();
+             I != E; I = Next) {
+          Next = I;
+          ++Next;
+          LiveRangeWrapper Item = *I;
+          if (Item.Var->getRegNumTmp() == MinWeightIndex) {
+            if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+              Str << "Evicting     ";
+              Item.dump(Func);
+              Str << "\n";
+            }
+            --RegUses[MinWeightIndex];
+            assert(RegUses[MinWeightIndex] >= 0);
+            Item.Var->setRegNumTmp(Variable::NoRegister);
+            Active.erase(I);
+            Handled.push_back(Item);
+          }
+        }
+        // Do the same for Inactive.
+        for (UnorderedRanges::iterator I = Inactive.begin(), E = Inactive.end();
+             I != E; I = Next) {
+          Next = I;
+          ++Next;
+          LiveRangeWrapper Item = *I;
+          if (Item.Var->getRegNumTmp() == MinWeightIndex) {
+            if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+              Str << "Evicting     ";
+              Item.dump(Func);
+              Str << "\n";
+            }
+            Item.Var->setRegNumTmp(Variable::NoRegister);
+            Inactive.erase(I);
+            Handled.push_back(Item);
+          }
+        }
+        // Assign the register to Cur.
+        Cur.Var->setRegNumTmp(MinWeightIndex);
+        assert(RegUses[MinWeightIndex] >= 0);
+        ++RegUses[MinWeightIndex];
+        Active.push_back(Cur);
+        if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+          Str << "Allocating   ";
+          Cur.dump(Func);
+          Str << "\n";
+        }
+      }
+    }
+    dump(Func);
+  }
+  // Move anything Active or Inactive to Handled for easier handling.
+  for (UnorderedRanges::iterator I = Active.begin(), E = Active.end(); I != E;
+       I = Next) {
+    Next = I;
+    ++Next;
+    Handled.push_back(*I);
+    Active.erase(I);
+  }
+  for (UnorderedRanges::iterator I = Inactive.begin(), E = Inactive.end();
+       I != E; I = Next) {
+    Next = I;
+    ++Next;
+    Handled.push_back(*I);
+    Inactive.erase(I);
+  }
+  dump(Func);
+
+  // Finish up by assigning RegNumTmp->RegNum for each Variable.
+  for (UnorderedRanges::const_iterator I = Handled.begin(), E = Handled.end();
+       I != E; ++I) {
+    LiveRangeWrapper Item = *I;
+    int32_t RegNum = Item.Var->getRegNumTmp();
+    if (Func->getContext()->isVerbose(IceV_LinearScan)) {
+      if (!Item.Var->hasRegTmp()) {
+        Str << "Not assigning ";
+        Item.Var->dump(Func);
+        Str << "\n";
+      } else {
+        Str << (RegNum == Item.Var->getRegNum() ? "Reassigning " : "Assigning ")
+            << Func->getTarget()->getRegName(RegNum, IceType_i32) << "(r"
+            << RegNum << ") to ";
+        Item.Var->dump(Func);
+        Str << "\n";
+      }
+    }
+    Item.Var->setRegNum(Item.Var->getRegNumTmp());
+  }
+
+  // TODO: Consider running register allocation one more time, with
+  // infinite registers, for two reasons.  First, evicted live ranges
+  // get a second chance for a register.  Second, it allows coalescing
+  // of stack slots.  If there is no time budget for the second
+  // register allocation run, each unallocated variable just gets its
+  // own slot.
+  //
+  // Another idea for coalescing stack slots is to initialize the
+  // Unhandled list with just the unallocated variables, saving time
+  // but not offering second-chance opportunities.
+}
+
+// ======================== Dump routines ======================== //
+
+void LiveRangeWrapper::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  const static size_t BufLen = 30;
+  char buf[BufLen];
+  snprintf(buf, BufLen, "%2d", Var->getRegNumTmp());
+  Str << "R=" << buf << "  V=";
+  Var->dump(Func);
+  Str << "  Range=" << range();
+}
+
+void LinearScan::dump(Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  if (!Func->getContext()->isVerbose(IceV_LinearScan))
+    return;
+  Func->setCurrentNode(NULL);
+  Str << "**** Current regalloc state:\n";
+  Str << "++++++ Handled:\n";
+  for (UnorderedRanges::const_iterator I = Handled.begin(), E = Handled.end();
+       I != E; ++I) {
+    I->dump(Func);
+    Str << "\n";
+  }
+  Str << "++++++ Unhandled:\n";
+  for (OrderedRanges::const_iterator I = Unhandled.begin(), E = Unhandled.end();
+       I != E; ++I) {
+    I->dump(Func);
+    Str << "\n";
+  }
+  Str << "++++++ Active:\n";
+  for (UnorderedRanges::const_iterator I = Active.begin(), E = Active.end();
+       I != E; ++I) {
+    I->dump(Func);
+    Str << "\n";
+  }
+  Str << "++++++ Inactive:\n";
+  for (UnorderedRanges::const_iterator I = Inactive.begin(), E = Inactive.end();
+       I != E; ++I) {
+    I->dump(Func);
+    Str << "\n";
+  }
+}
+
+} // end of namespace Ice
diff --git a/src/IceRegAlloc.h b/src/IceRegAlloc.h
new file mode 100644
index 0000000..edcf266
--- /dev/null
+++ b/src/IceRegAlloc.h
@@ -0,0 +1,81 @@
+//===- subzero/src/IceRegAlloc.h - Linear-scan reg. allocation --*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the data structures used during linear-scan
+// register allocation.  This includes LiveRangeWrapper which
+// encapsulates a variable and its live range, and LinearScan which
+// holds the various work queues for the linear-scan algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICEREGALLOC_H
+#define SUBZERO_SRC_ICEREGALLOC_H
+
+#include "IceDefs.h"
+#include "IceTypes.h"
+
+namespace Ice {
+
+// Currently this just wraps a Variable pointer, so in principle we
+// could use containers of Variable* instead of LiveRangeWrapper.  But
+// in the future, we may want to do more complex things such as live
+// range splitting, and keeping a wrapper should make that simpler.
+class LiveRangeWrapper {
+public:
+  LiveRangeWrapper(Variable *Var) : Var(Var) {}
+  const LiveRange &range() const { return Var->getLiveRange(); }
+  bool endsBefore(const LiveRangeWrapper &Other) const {
+    return range().endsBefore(Other.range());
+  }
+  bool overlaps(const LiveRangeWrapper &Other) const {
+    return range().overlaps(Other.range());
+  }
+  bool overlapsStart(const LiveRangeWrapper &Other) const {
+    return range().overlaps(Other.range().getStart());
+  }
+  Variable *const Var;
+  void dump(const Cfg *Func) const;
+
+private:
+  // LiveRangeWrapper(const LiveRangeWrapper &) LLVM_DELETED_FUNCTION;
+  LiveRangeWrapper &operator=(const LiveRangeWrapper &) LLVM_DELETED_FUNCTION;
+};
+
+class LinearScan {
+public:
+  LinearScan(Cfg *Func) : Func(Func) {}
+  void scan(const llvm::SmallBitVector &RegMask);
+  void dump(Cfg *Func) const;
+
+private:
+  Cfg *const Func;
+  // RangeCompare is the comparator for sorting an LiveRangeWrapper
+  // by starting point in a std::set<>.  Ties are broken by variable
+  // number so that sorting is stable.
+  struct RangeCompare {
+    bool operator()(const LiveRangeWrapper &L,
+                    const LiveRangeWrapper &R) const {
+      InstNumberT Lstart = L.Var->getLiveRange().getStart();
+      InstNumberT Rstart = R.Var->getLiveRange().getStart();
+      if (Lstart == Rstart)
+        return L.Var->getIndex() < R.Var->getIndex();
+      return Lstart < Rstart;
+    }
+  };
+  typedef std::set<LiveRangeWrapper, RangeCompare> OrderedRanges;
+  typedef std::list<LiveRangeWrapper> UnorderedRanges;
+  OrderedRanges Unhandled;
+  UnorderedRanges Active, Inactive, Handled;
+  LinearScan(const LinearScan &) LLVM_DELETED_FUNCTION;
+  LinearScan &operator=(const LinearScan &) LLVM_DELETED_FUNCTION;
+};
+
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICEREGALLOC_H
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 0d07475..72a3e8c 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -18,6 +18,7 @@
 #include "IceCfg.h" // setError()
 #include "IceCfgNode.h"
 #include "IceOperand.h"
+#include "IceRegAlloc.h"
 #include "IceTargetLowering.h"
 #include "IceTargetLoweringX8632.h"
 
@@ -66,6 +67,15 @@
   return NULL;
 }
 
+void TargetLowering::doAddressOpt() {
+  if (llvm::isa<InstLoad>(*Context.getCur()))
+    doAddressOptLoad();
+  else if (llvm::isa<InstStore>(*Context.getCur()))
+    doAddressOptStore();
+  Context.advanceCur();
+  Context.advanceNext();
+}
+
 // Lowers a single instruction according to the information in
 // Context, by checking the Context.Cur instruction kind and calling
 // the appropriate lowering method.  The lowering method should insert
@@ -144,4 +154,21 @@
   Context.advanceNext();
 }
 
+// Drives register allocation, allowing all physical registers (except
+// perhaps for the frame pointer) to be allocated.  This set of
+// registers could potentially be parameterized if we want to restrict
+// registers e.g. for performance testing.
+void TargetLowering::regAlloc() {
+  LinearScan LinearScan(Func);
+  RegSetMask RegInclude = RegSet_None;
+  RegSetMask RegExclude = RegSet_None;
+  RegInclude |= RegSet_CallerSave;
+  RegInclude |= RegSet_CalleeSave;
+  RegExclude |= RegSet_StackPointer;
+  if (hasFramePointer())
+    RegExclude |= RegSet_FramePointer;
+  llvm::SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
+  LinearScan.scan(RegMask);
+}
+
 } // end of namespace Ice
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 92a36af..7f798a8 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -109,6 +109,8 @@
     Func->setError("Target doesn't specify O2 lowering steps.");
   }
 
+  // Tries to do address mode optimization on a single instruction.
+  void doAddressOpt();
   // Lowers a single instruction.
   void lower();
 
@@ -173,6 +175,8 @@
   virtual void lowerSwitch(const InstSwitch *Inst) = 0;
   virtual void lowerUnreachable(const InstUnreachable *Inst) = 0;
 
+  virtual void doAddressOptLoad() {}
+  virtual void doAddressOptStore() {}
   // This gives the target an opportunity to post-process the lowered
   // expansion before returning.  The primary intention is to do some
   // Register Manager activity as necessary, specifically to eagerly
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 4b8bf27..81973d0 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -210,9 +210,10 @@
   TypeToRegisterSet[IceType_f64] = FloatRegisters;
 }
 
-void TargetX8632::translateOm1() {
+void TargetX8632::translateO2() {
   GlobalContext *Context = Func->getContext();
-  Ostream &Str = Context->getStrDump();
+
+  // Lower Phi instructions.
   Timer T_placePhiLoads;
   Func->placePhiLoads();
   if (Func->hasError())
@@ -228,30 +229,108 @@
   if (Func->hasError())
     return;
   T_deletePhis.printElapsedUs(Context, "deletePhis()");
-  if (Context->isVerbose()) {
-    Str << "================ After Phi lowering ================\n";
-    Func->dump();
-  }
+  Func->dump("After Phi lowering");
+
+  // Address mode optimization.
+  Timer T_doAddressOpt;
+  Func->doAddressOpt();
+  T_doAddressOpt.printElapsedUs(Context, "doAddressOpt()");
+
+  // Target lowering.  This requires liveness analysis for some parts
+  // of the lowering decisions, such as compare/branch fusing.  If
+  // non-lightweight liveness analysis is used, the instructions need
+  // to be renumbered first.  TODO: This renumbering should only be
+  // necessary if we're actually calculating live intervals, which we
+  // only do for register allocation.
+  Timer T_renumber1;
+  Func->renumberInstructions();
+  if (Func->hasError())
+    return;
+  T_renumber1.printElapsedUs(Context, "renumberInstructions()");
+  // TODO: It should be sufficient to use the fastest liveness
+  // calculation, i.e. livenessLightweight().  However, for some
+  // reason that slows down the rest of the translation.  Investigate.
+  Timer T_liveness1;
+  Func->liveness(Liveness_Basic);
+  if (Func->hasError())
+    return;
+  T_liveness1.printElapsedUs(Context, "liveness()");
+  Func->dump("After x86 address mode opt");
+  Timer T_genCode;
+  Func->genCode();
+  if (Func->hasError())
+    return;
+  T_genCode.printElapsedUs(Context, "genCode()");
+
+  // Register allocation.  This requires instruction renumbering and
+  // full liveness analysis.
+  Timer T_renumber2;
+  Func->renumberInstructions();
+  if (Func->hasError())
+    return;
+  T_renumber2.printElapsedUs(Context, "renumberInstructions()");
+  Timer T_liveness2;
+  Func->liveness(Liveness_Intervals);
+  if (Func->hasError())
+    return;
+  T_liveness2.printElapsedUs(Context, "liveness()");
+  // Validate the live range computations.  Do it outside the timing
+  // code.  TODO: Put this under a flag.
+  bool ValidLiveness = Func->validateLiveness();
+  assert(ValidLiveness);
+  (void)ValidLiveness; // used only in assert()
+  ComputedLiveRanges = true;
+  // The post-codegen dump is done here, after liveness analysis and
+  // associated cleanup, to make the dump cleaner and more useful.
+  Func->dump("After initial x8632 codegen");
+  Timer T_regAlloc;
+  regAlloc();
+  if (Func->hasError())
+    return;
+  T_regAlloc.printElapsedUs(Context, "regAlloc()");
+  Func->dump("After linear scan regalloc");
+
+  // Stack frame mapping.
+  Timer T_genFrame;
+  Func->genFrame();
+  if (Func->hasError())
+    return;
+  T_genFrame.printElapsedUs(Context, "genFrame()");
+  Func->dump("After stack frame mapping");
+}
+
+void TargetX8632::translateOm1() {
+  GlobalContext *Context = Func->getContext();
+  Timer T_placePhiLoads;
+  Func->placePhiLoads();
+  if (Func->hasError())
+    return;
+  T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
+  Timer T_placePhiStores;
+  Func->placePhiStores();
+  if (Func->hasError())
+    return;
+  T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
+  Timer T_deletePhis;
+  Func->deletePhis();
+  if (Func->hasError())
+    return;
+  T_deletePhis.printElapsedUs(Context, "deletePhis()");
+  Func->dump("After Phi lowering");
 
   Timer T_genCode;
   Func->genCode();
   if (Func->hasError())
     return;
   T_genCode.printElapsedUs(Context, "genCode()");
-  if (Context->isVerbose()) {
-    Str << "================ After initial x8632 codegen ================\n";
-    Func->dump();
-  }
+  Func->dump("After initial x8632 codegen");
 
   Timer T_genFrame;
   Func->genFrame();
   if (Func->hasError())
     return;
   T_genFrame.printElapsedUs(Context, "genFrame()");
-  if (Context->isVerbose()) {
-    Str << "================ After stack frame mapping ================\n";
-    Func->dump();
-  }
+  Func->dump("After stack frame mapping");
 }
 
 IceString TargetX8632::RegNames[] = {
@@ -327,8 +406,8 @@
 // calls itself recursively on the components, taking care to handle
 // Lo first because of the little-endian architecture.
 void TargetX8632::setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr,
-                                      int32_t BasicFrameOffset,
-                                      int32_t &InArgsSizeBytes) {
+                                      size_t BasicFrameOffset,
+                                      size_t &InArgsSizeBytes) {
   Variable *Lo = Arg->getLo();
   Variable *Hi = Arg->getHi();
   Type Ty = Arg->getType();
@@ -359,9 +438,9 @@
   // block 1 and C is local to block 2, then C may share a slot with A
   // or B.
   const bool SimpleCoalescing = true;
-  int32_t InArgsSizeBytes = 0;
-  int32_t RetIpSizeBytes = 4;
-  int32_t PreservedRegsSizeBytes = 0;
+  size_t InArgsSizeBytes = 0;
+  size_t RetIpSizeBytes = 4;
+  size_t PreservedRegsSizeBytes = 0;
   LocalsSizeBytes = 0;
   Context.init(Node);
   Context.setInsertPoint(Context.getCur());
@@ -380,8 +459,8 @@
   llvm::SmallBitVector CalleeSaves =
       getRegisterSet(RegSet_CalleeSave, RegSet_None);
 
-  int32_t GlobalsSize = 0;
-  std::vector<int> LocalsSize(Func->getNumNodes());
+  size_t GlobalsSize = 0;
+  std::vector<size_t> LocalsSize(Func->getNumNodes());
 
   // Prepass.  Compute RegsUsed, PreservedRegsSizeBytes, and
   // LocalsSizeBytes.
@@ -398,6 +477,9 @@
     // An argument passed on the stack already has a stack slot.
     if (Var->getIsArg())
       continue;
+    // An unreferenced variable doesn't need a stack slot.
+    if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
+      continue;
     // A spill slot linked to a variable with a stack slot should reuse
     // that stack slot.
     if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
@@ -406,7 +488,7 @@
           continue;
       }
     }
-    int32_t Increment = typeWidthInBytesOnStack(Var->getType());
+    size_t Increment = typeWidthInBytesOnStack(Var->getType());
     if (SimpleCoalescing) {
       if (Var->isMultiblockLife()) {
         GlobalsSize += Increment;
@@ -461,7 +543,7 @@
   // and if they have no home register, home space will need to be
   // allocated on the stack to copy into.
   Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
-  int32_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
+  size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
   if (!IsEbpBasedFrame)
     BasicFrameOffset += LocalsSizeBytes;
   for (SizeT i = 0; i < Args.size(); ++i) {
@@ -470,10 +552,10 @@
   }
 
   // Fill in stack offsets for locals.
-  int32_t TotalGlobalsSize = GlobalsSize;
+  size_t TotalGlobalsSize = GlobalsSize;
   GlobalsSize = 0;
   LocalsSize.assign(LocalsSize.size(), 0);
-  int32_t NextStackOffset = 0;
+  size_t NextStackOffset = 0;
   for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
        I != E; ++I) {
     Variable *Var = *I;
@@ -483,6 +565,8 @@
     }
     if (Var->getIsArg())
       continue;
+    if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
+      continue;
     if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
       if (Variable *Linked = Var->getPreferredRegister()) {
         if (!Linked->hasReg()) {
@@ -493,7 +577,7 @@
         }
       }
     }
-    int32_t Increment = typeWidthInBytesOnStack(Var->getType());
+    size_t Increment = typeWidthInBytesOnStack(Var->getType());
     if (SimpleCoalescing) {
       if (Var->isMultiblockLife()) {
         GlobalsSize += Increment;
@@ -1601,6 +1685,37 @@
   Operand *Src1 = legalize(Inst->getSrc(1));
   Variable *Dest = Inst->getDest();
 
+  // If Src1 is an immediate, or known to be a physical register, we can
+  // allow Src0 to be a memory operand.  Otherwise, Src0 must be copied into
+  // a physical register.  (Actually, either Src0 or Src1 can be chosen for
+  // the physical register, but unfortunately we have to commit to one or
+  // the other before register allocation.)
+  bool IsSrc1ImmOrReg = false;
+  if (llvm::isa<Constant>(Src1)) {
+    IsSrc1ImmOrReg = true;
+  } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
+    if (Var->hasReg())
+      IsSrc1ImmOrReg = true;
+  }
+
+  // Try to fuse a compare immediately followed by a conditional branch.  This
+  // is possible when the compare dest and the branch source operands are the
+  // same, and are their only uses.  TODO: implement this optimization for i64.
+  if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
+    if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
+        Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
+      Operand *Src0New =
+          legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
+      _cmp(Src0New, Src1);
+      _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
+          NextBr->getTargetFalse());
+      // Skip over the following branch instruction.
+      NextBr->setDeleted();
+      Context.advanceNext();
+      return;
+    }
+  }
+
   // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
   Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
   Constant *One = Ctx->getConstantInt(IceType_i32, 1);
@@ -1637,19 +1752,6 @@
     return;
   }
 
-  // If Src1 is an immediate, or known to be a physical register, we can
-  // allow Src0 to be a memory operand.  Otherwise, Src0 must be copied into
-  // a physical register.  (Actually, either Src0 or Src1 can be chosen for
-  // the physical register, but unfortunately we have to commit to one or
-  // the other before register allocation.)
-  bool IsSrc1ImmOrReg = false;
-  if (llvm::isa<Constant>(Src1)) {
-    IsSrc1ImmOrReg = true;
-  } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
-    if (Var->hasReg())
-      IsSrc1ImmOrReg = true;
-  }
-
   // cmp b, c
   Operand *Src0New =
       legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
@@ -1662,6 +1764,135 @@
   Context.insert(Label);
 }
 
+namespace {
+
+bool isAdd(const Inst *Inst) {
+  if (const InstArithmetic *Arith =
+          llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
+    return (Arith->getOp() == InstArithmetic::Add);
+  }
+  return false;
+}
+
+void computeAddressOpt(Variable *&Base, Variable *&Index, int32_t &Shift,
+                       int32_t &Offset) {
+  (void)Offset; // TODO: pattern-match for non-zero offsets.
+  if (Base == NULL)
+    return;
+  // If the Base has more than one use or is live across multiple
+  // blocks, then don't go further.  Alternatively (?), never consider
+  // a transformation that would change a variable that is currently
+  // *not* live across basic block boundaries into one that *is*.
+  if (Base->isMultiblockLife() /* || Base->getUseCount() > 1*/)
+    return;
+
+  while (true) {
+    // Base is Base=Var ==>
+    //   set Base=Var
+    const Inst *BaseInst = Base->getDefinition();
+    Operand *BaseOperand0 = BaseInst ? BaseInst->getSrc(0) : NULL;
+    Variable *BaseVariable0 = llvm::dyn_cast_or_null<Variable>(BaseOperand0);
+    // TODO: Helper function for all instances of assignment
+    // transitivity.
+    if (BaseInst && llvm::isa<InstAssign>(BaseInst) && BaseVariable0 &&
+        // TODO: ensure BaseVariable0 stays single-BB
+        true) {
+      Base = BaseVariable0;
+      continue;
+    }
+
+    // Index is Index=Var ==>
+    //   set Index=Var
+
+    // Index==NULL && Base is Base=Var1+Var2 ==>
+    //   set Base=Var1, Index=Var2, Shift=0
+    Operand *BaseOperand1 =
+        BaseInst && BaseInst->getSrcSize() >= 2 ? BaseInst->getSrc(1) : NULL;
+    Variable *BaseVariable1 = llvm::dyn_cast_or_null<Variable>(BaseOperand1);
+    if (Index == NULL && isAdd(BaseInst) && BaseVariable0 && BaseVariable1 &&
+        // TODO: ensure BaseVariable0 and BaseVariable1 stay single-BB
+        true) {
+      Base = BaseVariable0;
+      Index = BaseVariable1;
+      Shift = 0; // should already have been 0
+      continue;
+    }
+
+    // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
+    //   Index=Var, Shift+=log2(Const)
+    const Inst *IndexInst = Index ? Index->getDefinition() : NULL;
+    if (const InstArithmetic *ArithInst =
+            llvm::dyn_cast_or_null<InstArithmetic>(IndexInst)) {
+      Operand *IndexOperand0 = ArithInst->getSrc(0);
+      Variable *IndexVariable0 = llvm::dyn_cast<Variable>(IndexOperand0);
+      Operand *IndexOperand1 = ArithInst->getSrc(1);
+      ConstantInteger *IndexConstant1 =
+          llvm::dyn_cast<ConstantInteger>(IndexOperand1);
+      if (ArithInst->getOp() == InstArithmetic::Mul && IndexVariable0 &&
+          IndexOperand1->getType() == IceType_i32 && IndexConstant1) {
+        uint64_t Mult = IndexConstant1->getValue();
+        uint32_t LogMult;
+        switch (Mult) {
+        case 1:
+          LogMult = 0;
+          break;
+        case 2:
+          LogMult = 1;
+          break;
+        case 4:
+          LogMult = 2;
+          break;
+        case 8:
+          LogMult = 3;
+          break;
+        default:
+          LogMult = 4;
+          break;
+        }
+        if (Shift + LogMult <= 3) {
+          Index = IndexVariable0;
+          Shift += LogMult;
+          continue;
+        }
+      }
+    }
+
+    // Index is Index=Var<<Const && Const+Shift<=3 ==>
+    //   Index=Var, Shift+=Const
+
+    // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
+    //   Index=Var, Shift+=log2(Const)
+
+    // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
+    //   swap(Index,Base)
+    // Similar for Base=Const*Var and Base=Var<<Const
+
+    // Base is Base=Var+Const ==>
+    //   set Base=Var, Offset+=Const
+
+    // Base is Base=Const+Var ==>
+    //   set Base=Var, Offset+=Const
+
+    // Base is Base=Var-Const ==>
+    //   set Base=Var, Offset-=Const
+
+    // Index is Index=Var+Const ==>
+    //   set Index=Var, Offset+=(Const<<Shift)
+
+    // Index is Index=Const+Var ==>
+    //   set Index=Var, Offset+=(Const<<Shift)
+
+    // Index is Index=Var-Const ==>
+    //   set Index=Var, Offset-=(Const<<Shift)
+
+    // TODO: consider overflow issues with respect to Offset.
+    // TODO: handle symbolic constants.
+    break;
+  }
+}
+
+} // anonymous namespace
+
 void TargetX8632::lowerLoad(const InstLoad *Inst) {
   // A Load instruction can be treated the same as an Assign
   // instruction, after the source operand is transformed into an
@@ -1679,10 +1910,64 @@
     Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset);
   }
 
+  // Fuse this load with a subsequent Arithmetic instruction in the
+  // following situations:
+  //   a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
+  //   a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
+  //
+  // TODO: Clean up and test thoroughly.
+  //
+  // TODO: Why limit to Arithmetic instructions?  This could probably be
+  // applied to most any instruction type.  Look at all source operands
+  // in the following instruction, and if there is one instance of the
+  // load instruction's dest variable, and that instruction ends that
+  // variable's live range, then make the substitution.  Deal with
+  // commutativity optimization in the arithmetic instruction lowering.
+  InstArithmetic *NewArith = NULL;
+  if (InstArithmetic *Arith =
+          llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) {
+    Variable *DestLoad = Inst->getDest();
+    Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
+    Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
+    if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) &&
+        DestLoad != Src0Arith) {
+      NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
+                                        Arith->getSrc(0), Src0);
+    } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
+               Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) {
+      NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
+                                        Arith->getSrc(1), Src0);
+    }
+    if (NewArith) {
+      Arith->setDeleted();
+      Context.advanceNext();
+      lowerArithmetic(NewArith);
+      return;
+    }
+  }
+
   InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
   lowerAssign(Assign);
 }
 
+void TargetX8632::doAddressOptLoad() {
+  Inst *Inst = *Context.getCur();
+  Variable *Dest = Inst->getDest();
+  Operand *Addr = Inst->getSrc(0);
+  Variable *Index = NULL;
+  int32_t Shift = 0;
+  int32_t Offset = 0; // TODO: make Constant
+  Variable *Base = llvm::dyn_cast<Variable>(Addr);
+  computeAddressOpt(Base, Index, Shift, Offset);
+  if (Base && Addr != Base) {
+    Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
+    Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
+                                   Shift);
+    Inst->setDeleted();
+    Context.insert(InstLoad::create(Func, Dest, Addr));
+  }
+}
+
 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
   Func->setError("Phi found in regular instruction list");
 }
@@ -1781,6 +2066,24 @@
   }
 }
 
+void TargetX8632::doAddressOptStore() {
+  InstStore *Inst = llvm::cast<InstStore>(*Context.getCur());
+  Operand *Data = Inst->getData();
+  Operand *Addr = Inst->getAddr();
+  Variable *Index = NULL;
+  int32_t Shift = 0;
+  int32_t Offset = 0; // TODO: make Constant
+  Variable *Base = llvm::dyn_cast<Variable>(Addr);
+  computeAddressOpt(Base, Index, Shift, Offset);
+  if (Base && Addr != Base) {
+    Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
+    Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
+                                   Shift);
+    Inst->setDeleted();
+    Context.insert(InstStore::create(Func, Data, Addr));
+  }
+}
+
 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
   // This implements the most naive possible lowering.
   // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
@@ -1904,11 +2207,10 @@
       continue;
     if (llvm::isa<InstFakeKill>(Inst))
       continue;
-    SizeT VarIndex = 0;
     for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
       Operand *Src = Inst->getSrc(SrcNum);
       SizeT NumVars = Src->getNumVars();
-      for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
+      for (SizeT J = 0; J < NumVars; ++J) {
         const Variable *Var = Src->getVar(J);
         if (!Var->hasReg())
           continue;
@@ -1923,11 +2225,10 @@
     const Inst *Inst = *I;
     if (Inst->isDeleted())
       continue;
-    SizeT VarIndex = 0;
     for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
       Operand *Src = Inst->getSrc(SrcNum);
       SizeT NumVars = Src->getNumVars();
-      for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
+      for (SizeT J = 0; J < NumVars; ++J) {
         Variable *Var = Src->getVar(J);
         if (Var->hasReg())
           continue;
@@ -1952,15 +2253,15 @@
   }
 }
 
-template <> void ConstantFloat::emit(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrEmit();
+template <> void ConstantFloat::emit(GlobalContext *Ctx) const {
+  Ostream &Str = Ctx->getStrEmit();
   // It would be better to prefix with ".L$" instead of "L$", but
   // llvm-mc doesn't parse "dword ptr [.L$foo]".
   Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]";
 }
 
-template <> void ConstantDouble::emit(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrEmit();
+template <> void ConstantDouble::emit(GlobalContext *Ctx) const {
+  Ostream &Str = Ctx->getStrEmit();
   Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]";
 }
 
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index e5f8be2..3ca9ca3 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -27,6 +27,7 @@
   static TargetX8632 *create(Cfg *Func) { return new TargetX8632(Func); }
 
   virtual void translateOm1();
+  virtual void translateO2();
 
   virtual Variable *getPhysicalRegister(SizeT RegNum);
   virtual IceString getRegName(SizeT RegNum, Type Ty) const;
@@ -56,7 +57,7 @@
   // latter could be done by directly writing to the stack).
   void split64(Variable *Var);
   void setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr,
-                           int32_t BasicFrameOffset, int32_t &InArgsSizeBytes);
+                           size_t BasicFrameOffset, size_t &InArgsSizeBytes);
   Operand *loOperand(Operand *Operand);
   Operand *hiOperand(Operand *Operand);
 
@@ -89,6 +90,8 @@
   virtual void lowerStore(const InstStore *Inst);
   virtual void lowerSwitch(const InstSwitch *Inst);
   virtual void lowerUnreachable(const InstUnreachable *Inst);
+  virtual void doAddressOptLoad();
+  virtual void doAddressOptStore();
 
   // Operand legalization helpers.  To deal with address mode
   // constraints, the helpers will create a new Operand and emit
@@ -248,8 +251,8 @@
   }
 
   bool IsEbpBasedFrame;
-  int32_t FrameSizeLocals;
-  int32_t LocalsSizeBytes;
+  size_t FrameSizeLocals;
+  size_t LocalsSizeBytes;
   llvm::SmallBitVector TypeToRegisterSet[IceType_NUM];
   llvm::SmallBitVector ScratchRegs;
   llvm::SmallBitVector RegsUsed;
@@ -265,8 +268,8 @@
   template <typename T> void emitConstantPool() const;
 };
 
-template <> void ConstantFloat::emit(const Cfg *Func) const;
-template <> void ConstantDouble::emit(const Cfg *Func) const;
+template <> void ConstantFloat::emit(GlobalContext *Ctx) const;
+template <> void ConstantDouble::emit(GlobalContext *Ctx) const;
 
 } // end of namespace Ice
 
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index e29637d..374d753 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -100,6 +100,29 @@
     return Func;
   }
 
+  // convertConstant() does not use Func or require it to be a valid
+  // Ice::Cfg pointer.  As such, it's suitable for e.g. constructing
+  // global initializers.
+  Ice::Constant *convertConstant(const Constant *Const) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(Const)) {
+      return Ctx->getConstantSym(convertType(GV->getType()), 0, GV->getName());
+    } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(Const)) {
+      return Ctx->getConstantInt(convertIntegerType(CI->getType()),
+                                 CI->getZExtValue());
+    } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Const)) {
+      Ice::Type Type = convertType(CFP->getType());
+      if (Type == Ice::IceType_f32)
+        return Ctx->getConstantFloat(CFP->getValueAPF().convertToFloat());
+      else if (Type == Ice::IceType_f64)
+        return Ctx->getConstantDouble(CFP->getValueAPF().convertToDouble());
+      llvm_unreachable("Unexpected floating point type");
+      return NULL;
+    } else {
+      llvm_unreachable("Unhandled constant type");
+      return NULL;
+    }
+  }
+
 private:
   // LLVM values (instructions, etc.) are mapped directly to ICE variables.
   // mapValueToIceVar has a version that forces an ICE type on the variable,
@@ -180,24 +203,7 @@
 
   Ice::Operand *convertValue(const Value *Op) {
     if (const Constant *Const = dyn_cast<Constant>(Op)) {
-      if (const GlobalValue *GV = dyn_cast<GlobalValue>(Const)) {
-        return Ctx->getConstantSym(convertType(GV->getType()), 0,
-                                   GV->getName());
-      } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(Const)) {
-        return Ctx->getConstantInt(convertIntegerType(CI->getType()),
-                                   CI->getZExtValue());
-      } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Const)) {
-        Ice::Type Type = convertType(CFP->getType());
-        if (Type == Ice::IceType_f32)
-          return Ctx->getConstantFloat(CFP->getValueAPF().convertToFloat());
-        else if (Type == Ice::IceType_f64)
-          return Ctx->getConstantDouble(CFP->getValueAPF().convertToDouble());
-        llvm_unreachable("Unexpected floating point type");
-        return NULL;
-      } else {
-        llvm_unreachable("Unhandled constant type");
-        return NULL;
-      }
+      return convertConstant(Const);
     } else {
       return mapValueToIceVar(Op);
     }
diff --git a/tests_lit/llvm2ice_tests/64bit.pnacl.ll b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
index 1ca8077..f942d51 100644
--- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -2,7 +2,7 @@
 ; particular the patterns for lowering i64 operations into constituent
 ; i32 operations on x86-32.
 
-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
diff --git a/tests_lit/llvm2ice_tests/alloc.ll b/tests_lit/llvm2ice_tests/alloc.ll
index d9da106..ff92342 100644
--- a/tests_lit/llvm2ice_tests/alloc.ll
+++ b/tests_lit/llvm2ice_tests/alloc.ll
@@ -1,7 +1,7 @@
 ; This is a basic test of the alloca instruction - one test for alloca
 ; of a fixed size, and one test for variable size.
 
-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
diff --git a/tests_lit/llvm2ice_tests/bitcast.ll b/tests_lit/llvm2ice_tests/bitcast.ll
index 8c9a936..cab2e43 100644
--- a/tests_lit/llvm2ice_tests/bitcast.ll
+++ b/tests_lit/llvm2ice_tests/bitcast.ll
@@ -1,6 +1,6 @@
 ; Trivial smoke test of bitcast between integer and FP types.
 
-; RUN: %llvm2ice --verbose inst %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
 ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
@@ -9,30 +9,38 @@
 define internal i32 @cast_f2i(float %f) {
 entry:
   %v0 = bitcast float %f to i32
-; CHECK: bitcast
   ret i32 %v0
 }
 
+; CHECK: mov eax
+; CHECK: ret
+
 define internal float @cast_i2f(i32 %i) {
 entry:
   %v0 = bitcast i32 %i to float
-; CHECK: bitcast
   ret float %v0
 }
 
+; CHECK: fld dword ptr
+; CHECK: ret
+
 define internal i64 @cast_d2ll(double %d) {
 entry:
   %v0 = bitcast double %d to i64
-; CHECK: bitcast
   ret i64 %v0
 }
 
+; CHECK: mov edx
+; CHECK: ret
+
 define internal double @cast_ll2d(i64 %ll) {
 entry:
   %v0 = bitcast i64 %ll to double
-; CHECK: bitcast
   ret double %v0
 }
 
+; CHECK: fld qword ptr
+; CHECK: ret
+
 ; ERRORS-NOT: ICE translation error
 ; DUMP-NOT: SZ
diff --git a/tests_lit/llvm2ice_tests/callindirect.pnacl.ll b/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
index 0112a5b..3f9ba8d 100644
--- a/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/callindirect.pnacl.ll
@@ -2,7 +2,7 @@
 ; should be to the same operand, whether it's in a register or on the
 ; stack.
 
-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
diff --git a/tests_lit/llvm2ice_tests/casts.ll b/tests_lit/llvm2ice_tests/casts.ll
index a75cd51..7016cfd 100644
--- a/tests_lit/llvm2ice_tests/casts.ll
+++ b/tests_lit/llvm2ice_tests/casts.ll
@@ -1,4 +1,4 @@
-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice --verbose inst %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
 ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
diff --git a/tests_lit/llvm2ice_tests/cmp-opt.ll b/tests_lit/llvm2ice_tests/cmp-opt.ll
index 0dd2a80..342dfac 100644
--- a/tests_lit/llvm2ice_tests/cmp-opt.ll
+++ b/tests_lit/llvm2ice_tests/cmp-opt.ll
@@ -1,6 +1,6 @@
 ; Simple test of non-fused compare/branch.
 
-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
diff --git a/tests_lit/llvm2ice_tests/convert.ll b/tests_lit/llvm2ice_tests/convert.ll
index 1b61db3..144d38a 100644
--- a/tests_lit/llvm2ice_tests/convert.ll
+++ b/tests_lit/llvm2ice_tests/convert.ll
@@ -1,7 +1,7 @@
 ; Simple test of signed and unsigned integer conversions.
 
-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
-; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
 ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
@@ -32,25 +32,14 @@
   ret void
 }
 ; CHECK: from_int8:
-; CHECK: mov al, byte ptr [
-; CHECK-NEXT: movsx cx, al
-; CHECK-NEXT: mov word ptr [
-; CHECK-NEXT: movsx ecx, al
-; CHECK-NEXT: mov dword ptr [
-; CHECK-NEXT: movsx ecx, al
-; CHECK-NEXT: sar eax, 31
-; CHECK-NEXT: mov dword ptr [i64v+4],
-; CHECK-NEXT: mov dword ptr [i64v],
-;
-; OPTM1: from_int8:
-; OPTM1: mov {{.*}}, byte ptr [
-; OPTM1: movsx
-; OPTM1: mov word ptr [
-; OPTM1: movsx
-; OPTM1: mov dword ptr [
-; OPTM1: movsx
-; OPTM1: sar {{.*}}, 31
-; OPTM1: i64v
+; CHECK: mov {{.*}}, byte ptr [
+; CHECK: movsx
+; CHECK: mov word ptr [
+; CHECK: movsx
+; CHECK: mov dword ptr [
+; CHECK: movsx
+; CHECK: sar {{.*}}, 31
+; CHECK: i64v
 
 define void @from_int16() {
 entry:
@@ -68,24 +57,13 @@
   ret void
 }
 ; CHECK: from_int16:
-; CHECK: mov ax, word ptr [
-; CHECK-NEXT: mov cx, ax
-; CHECK-NEXT: mov byte ptr [
-; CHECK-NEXT: movsx ecx, ax
-; CHECK-NEXT: mov dword ptr [
-; CHECK-NEXT: movsx ecx, ax
-; CHECK-NEXT: sar eax, 31
-; CHECK-NEXT: mov dword ptr [i64v+4],
-; CHECK-NEXT: mov dword ptr [i64v],
-;
-; OPTM1: from_int16:
-; OPTM1: mov {{.*}}, word ptr [
-; OPTM1: i8v
-; OPTM1: movsx
-; OPTM1: i32v
-; OPTM1: movsx
-; OPTM1: sar {{.*}}, 31
-; OPTM1: i64v
+; CHECK: mov {{.*}}, word ptr [
+; CHECK: i8v
+; CHECK: movsx
+; CHECK: i32v
+; CHECK: movsx
+; CHECK: sar {{.*}}, 31
+; CHECK: i64v
 
 define void @from_int32() {
 entry:
@@ -103,22 +81,11 @@
   ret void
 }
 ; CHECK: from_int32:
-; CHECK: mov eax, dword ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov byte ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov word ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: sar eax, 31
-; CHECK-NEXT: mov dword ptr [i64v+4],
-; CHECK-NEXT: mov dword ptr [i64v],
-;
-; OPTM1: from_int32:
-; OPTM1: i32v
-; OPTM1: i8v
-; OPTM1: i16v
-; OPTM1: sar {{.*}}, 31
-; OPTM1: i64v
+; CHECK: i32v
+; CHECK: i8v
+; CHECK: i16v
+; CHECK: sar {{.*}}, 31
+; CHECK: i64v
 
 define void @from_int64() {
 entry:
@@ -136,18 +103,10 @@
   ret void
 }
 ; CHECK: from_int64:
-; CHECK: mov eax, dword ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov byte ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov word ptr [
-; CHECK-NEXT: mov dword ptr [
-;
-; OPTM1: from_int64:
-; OPTM1: i64v
-; OPTM1: i8v
-; OPTM1: i16v
-; OPTM1: i32v
+; CHECK: i64v
+; CHECK: i8v
+; CHECK: i16v
+; CHECK: i32v
 
 define void @from_uint8() {
 entry:
@@ -165,25 +124,14 @@
   ret void
 }
 ; CHECK: from_uint8:
-; CHECK: mov al, byte ptr [
-; CHECK-NEXT: movzx cx, al
-; CHECK-NEXT: mov word ptr [
-; CHECK-NEXT: movzx ecx, al
-; CHECK-NEXT: mov dword ptr [
-; CHECK-NEXT: movzx eax, al
-; CHECK-NEXT: mov ecx, 0
-; CHECK-NEXT: mov dword ptr [i64v+4],
-; CHECK-NEXT: mov dword ptr [i64v],
-;
-; OPTM1: from_uint8:
-; OPTM1: u8v
-; OPTM1: movzx
-; OPTM1: i16v
-; OPTM1: movzx
-; OPTM1: i32v
-; OPTM1: movzx
-; OPTM1: mov {{.*}}, 0
-; OPTM1: i64v
+; CHECK: u8v
+; CHECK: movzx
+; CHECK: i16v
+; CHECK: movzx
+; CHECK: i32v
+; CHECK: movzx
+; CHECK: mov {{.*}}, 0
+; CHECK: i64v
 
 define void @from_uint16() {
 entry:
@@ -201,24 +149,13 @@
   ret void
 }
 ; CHECK: from_uint16:
-; CHECK: mov ax, word ptr [
-; CHECK-NEXT: mov cx, ax
-; CHECK-NEXT: mov byte ptr [
-; CHECK-NEXT: movzx ecx, ax
-; CHECK-NEXT: mov dword ptr [
-; CHECK-NEXT: movzx eax, ax
-; CHECK-NEXT: mov ecx, 0
-; CHECK-NEXT: mov dword ptr [i64v+4],
-; CHECK-NEXT: mov dword ptr [i64v],
-;
-; OPTM1: from_uint16:
-; OPTM1: u16v
-; OPTM1: i8v
-; OPTM1: movzx
-; OPTM1: i32v
-; OPTM1: movzx
-; OPTM1: mov {{.*}}, 0
-; OPTM1: i64v
+; CHECK: u16v
+; CHECK: i8v
+; CHECK: movzx
+; CHECK: i32v
+; CHECK: movzx
+; CHECK: mov {{.*}}, 0
+; CHECK: i64v
 
 define void @from_uint32() {
 entry:
@@ -236,21 +173,11 @@
   ret void
 }
 ; CHECK: from_uint32:
-; CHECK: mov eax, dword ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov byte ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov word ptr [
-; CHECK-NEXT: mov ecx, 0
-; CHECK-NEXT: mov dword ptr [i64v+4],
-; CHECK-NEXT: mov dword ptr [i64v],
-;
-; OPTM1: from_uint32:
-; OPTM1: u32v
-; OPTM1: i8v
-; OPTM1: i16v
-; OPTM1: mov {{.*}}, 0
-; OPTM1: i64v
+; CHECK: u32v
+; CHECK: i8v
+; CHECK: i16v
+; CHECK: mov {{.*}}, 0
+; CHECK: i64v
 
 define void @from_uint64() {
 entry:
@@ -268,18 +195,10 @@
   ret void
 }
 ; CHECK: from_uint64:
-; CHECK: mov eax, dword ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov byte ptr [
-; CHECK-NEXT: mov ecx, eax
-; CHECK-NEXT: mov word ptr [
-; CHECK-NEXT: mov dword ptr [
-;
-; OPTM1: from_uint64:
-; OPTM1: u64v
-; OPTM1: i8v
-; OPTM1: i16v
-; OPTM1: i32v
+; CHECK: u64v
+; CHECK: i8v
+; CHECK: i16v
+; CHECK: i32v
 
 ; ERRORS-NOT: ICE translation error
 ; DUMP-NOT: SZ
diff --git a/tests_lit/llvm2ice_tests/fp.pnacl.ll b/tests_lit/llvm2ice_tests/fp.pnacl.ll
index c31725a..f8b7228 100644
--- a/tests_lit/llvm2ice_tests/fp.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/fp.pnacl.ll
@@ -3,7 +3,7 @@
 ; that should be present regardless of the optimization level, so
 ; there are no special OPTM1 match lines.
 
-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
diff --git a/tests_lit/llvm2ice_tests/fpconst.pnacl.ll b/tests_lit/llvm2ice_tests/fpconst.pnacl.ll
index 9bbfe3b..9bcf178 100644
--- a/tests_lit/llvm2ice_tests/fpconst.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/fpconst.pnacl.ll
@@ -6,6 +6,7 @@
 ; number in a reasonable number of digits".  See
 ; http://llvm.org/docs/LangRef.html#simple-constants .
 
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
diff --git a/tests_lit/llvm2ice_tests/select-opt.ll b/tests_lit/llvm2ice_tests/select-opt.ll
index c0358fb..3ae2216 100644
--- a/tests_lit/llvm2ice_tests/select-opt.ll
+++ b/tests_lit/llvm2ice_tests/select-opt.ll
@@ -3,7 +3,7 @@
 ; regardless of the optimization level, so there are no special OPTM1
 ; match lines.
 
-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
diff --git a/tests_lit/llvm2ice_tests/shift.ll b/tests_lit/llvm2ice_tests/shift.ll
index c1a071f..2ec187d 100644
--- a/tests_lit/llvm2ice_tests/shift.ll
+++ b/tests_lit/llvm2ice_tests/shift.ll
@@ -1,7 +1,7 @@
 ; This is a test of C-level conversion operations that clang lowers
 ; into pairs of shifts.
 
-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
diff --git a/tests_lit/llvm2ice_tests/simple-loop.ll b/tests_lit/llvm2ice_tests/simple-loop.ll
index b983d14c..9402064 100644
--- a/tests_lit/llvm2ice_tests/simple-loop.ll
+++ b/tests_lit/llvm2ice_tests/simple-loop.ll
@@ -1,7 +1,7 @@
 ; This tests a simple loop that sums the elements of an input array.
 ; The O2 check patterns represent the best code currently achieved.
 
-; RUIN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
diff --git a/tests_lit/llvm2ice_tests/unreachable.ll b/tests_lit/llvm2ice_tests/unreachable.ll
index ab54704..b862625 100644
--- a/tests_lit/llvm2ice_tests/unreachable.ll
+++ b/tests_lit/llvm2ice_tests/unreachable.ll
@@ -20,5 +20,11 @@
   ret i32 %div
 }
 
+; CHECK: cmp
+; CHECK: call ice_unreachable
+; CHECK: cdq
+; CHECK: idiv
+; CHECK: ret
+
 ; ERRORS-NOT: ICE translation error
 ; DUMP-NOT: SZ
