Reflow comments to use the full width.

BUG=
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1341423002 .
diff --git a/src/IceAPFloat.h b/src/IceAPFloat.h
index ccfb7f3..9aed889 100644
--- a/src/IceAPFloat.h
+++ b/src/IceAPFloat.h
@@ -11,8 +11,8 @@
 /// \brief This file implements a class to represent Subzero float and double
 /// values.
 ///
-/// Note: This is a simplified version of
-/// llvm/include/llvm/ADT/APFloat.h for use with Subzero.
+/// Note: This is a simplified version of llvm/include/llvm/ADT/APFloat.h for
+/// use with Subzero.
 //===----------------------------------------------------------------------===//
 
 #ifndef SUBZERO_SRC_ICEAPFLOAT_H
diff --git a/src/IceAssembler.cpp b/src/IceAssembler.cpp
index 5c1760b..9c77dce 100644
--- a/src/IceAssembler.cpp
+++ b/src/IceAssembler.cpp
@@ -48,13 +48,13 @@
 }
 
 void AssemblerBuffer::EnsureCapacity::validate(AssemblerBuffer *buffer) {
-  // In debug mode, we save the assembler buffer along with the gap
-  // size before we start emitting to the buffer. This allows us to
-  // check that any single generated instruction doesn't overflow the
-  // limit implied by the minimum gap size.
+  // In debug mode, we save the assembler buffer along with the gap size before
+  // we start emitting to the buffer. This allows us to check that any single
+  // generated instruction doesn't overflow the limit implied by the minimum
+  // gap size.
   Gap = computeGap();
-  // Make sure that extending the capacity leaves a big enough gap
-  // for any kind of instruction.
+  // Make sure that extending the capacity leaves a big enough gap for any kind
+  // of instruction.
   assert(Gap >= kMinimumGap);
   // Mark the buffer as having ensured the capacity.
   assert(!buffer->hasEnsuredCapacity()); // Cannot nest.
@@ -64,8 +64,8 @@
 AssemblerBuffer::EnsureCapacity::~EnsureCapacity() {
   // Unmark the buffer, so we cannot emit after this.
   Buffer->HasEnsuredCapacity = false;
-  // Make sure the generated instruction doesn't take up more
-  // space than the minimum gap.
+  // Make sure the generated instruction doesn't take up more space than the
+  // minimum gap.
   intptr_t delta = Gap - computeGap();
   (void)delta;
   assert(delta <= kMinimumGap);
@@ -133,9 +133,9 @@
     }
     Str << "\t.long ";
     // For PCRel fixups, we write the pc-offset from a symbol into the Buffer
-    // (e.g., -4), but we don't represent that in the fixup's offset.
-    // Otherwise the fixup holds the true offset, and so does the Buffer.
-    // Just load the offset from the buffer.
+    // (e.g., -4), but we don't represent that in the fixup's offset. Otherwise
+    // the fixup holds the true offset, and so does the Buffer. Just load the
+    // offset from the buffer.
     NextFixup->emit(Ctx, Buffer.load<RelocOffsetT>(NextFixupLoc));
     if (fixupIsPCRel(NextFixup->kind()))
       Str << " - .";
diff --git a/src/IceAssembler.h b/src/IceAssembler.h
index 5b07975..8247e66 100644
--- a/src/IceAssembler.h
+++ b/src/IceAssembler.h
@@ -15,9 +15,9 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares the Assembler base class.  Instructions are assembled
-/// by architecture-specific assemblers that derive from this base class.
-/// This base class manages buffers and fixups for emitting code, etc.
+/// This file declares the Assembler base class. Instructions are assembled by
+/// architecture-specific assemblers that derive from this base class. This base
+/// class manages buffers and fixups for emitting code, etc.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -55,7 +55,7 @@
   }
 
   /// Returns the position of an earlier branch instruction that was linked to
-  /// this label (branches that use this are considered forward branches).  The
+  /// this label (branches that use this are considered forward branches). The
   /// linked instructions form a linked list, of sorts, using the instruction's
   /// displacement field for the location of the next instruction that is also
   /// linked to this label.
@@ -200,8 +200,8 @@
     return (Limit - Contents) + kMinimumGap;
   }
 
-  /// Compute the limit based on the data area and the capacity. See
-  /// description of kMinimumGap for the reasoning behind the value.
+  /// Compute the limit based on the data area and the capacity. See description
+  /// of kMinimumGap for the reasoning behind the value.
   static uintptr_t computeLimit(uintptr_t Data, intptr_t Capacity) {
     return Data + Capacity - kMinimumGap;
   }
@@ -226,12 +226,12 @@
 
   /// Allocate a chunk of bytes using the per-Assembler allocator.
   uintptr_t allocateBytes(size_t bytes) {
-    // For now, alignment is not related to NaCl bundle alignment, since
-    // the buffer's GetPosition is relative to the base. So NaCl bundle
-    // alignment checks can be relative to that base. Later, the buffer
-    // will be copied out to a ".text" section (or an in memory-buffer
-    // that can be mprotect'ed with executable permission), and that
-    // second buffer should be aligned for NaCl.
+    // For now, alignment is not related to NaCl bundle alignment, since the
+    // buffer's GetPosition is relative to the base. So NaCl bundle alignment
+    // checks can be relative to that base. Later, the buffer will be copied
+    // out to a ".text" section (or an in memory-buffer that can be mprotect'ed
+    // with executable permission), and that second buffer should be aligned
+    // for NaCl.
     const size_t Alignment = 16;
     return reinterpret_cast<uintptr_t>(Allocator.Allocate(bytes, Alignment));
   }
@@ -257,8 +257,8 @@
 
   /// Get the label for a CfgNode.
   virtual Label *getCfgNodeLabel(SizeT NodeNumber) = 0;
-  /// Mark the current text location as the start of a CFG node
-  /// (represented by NodeNumber).
+  /// Mark the current text location as the start of a CFG node (represented by
+  /// NodeNumber).
   virtual void bindCfgNodeLabel(SizeT NodeNumber) = 0;
 
   virtual bool fixupIsPCRel(FixupKind Kind) const = 0;
@@ -293,15 +293,15 @@
   const AssemblerKind Kind;
 
   ArenaAllocator<32 * 1024> Allocator;
-  /// FunctionName and IsInternal are transferred from the original Cfg
-  /// object, since the Cfg object may be deleted by the time the
-  /// assembler buffer is emitted.
+  /// FunctionName and IsInternal are transferred from the original Cfg object,
+  /// since the Cfg object may be deleted by the time the assembler buffer is
+  /// emitted.
   IceString FunctionName = "";
   bool IsInternal = false;
-  /// Preliminary indicates whether a preliminary pass is being made
-  /// for calculating bundle padding (Preliminary=true), versus the
-  /// final pass where all changes to label bindings, label links, and
-  /// relocation fixups are fully committed (Preliminary=false).
+  /// Preliminary indicates whether a preliminary pass is being made for
+  /// calculating bundle padding (Preliminary=true), versus the final pass where
+  /// all changes to label bindings, label links, and relocation fixups are
+  /// fully committed (Preliminary=false).
   bool Preliminary = false;
 
 protected:
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index ab1a29d..7c4be6f 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -195,8 +195,8 @@
       Type, typename Traits::GPRRegister, typename Traits::GPRRegister,
       const Immediate &);
   struct GPREmitterShiftD {
-    // Technically AddrGPR and AddrGPRImm are also allowed, but in practice
-    // we always normalize Dest to a Register first.
+    // Technically AddrGPR and AddrGPRImm are also allowed, but in practice we
+    // always normalize Dest to a Register first.
     TypedEmitGPRGPR GPRGPR;
     TypedEmitGPRGPRImm GPRGPRImm;
   };
@@ -252,8 +252,8 @@
     TypedEmitAddr RegAddr;
   };
 
-  // Three operand (potentially) cross Xmm/GPR instructions.
-  // The last operand must be an immediate.
+  // Three operand (potentially) cross Xmm/GPR instructions. The last operand
+  // must be an immediate.
   template <typename DReg_t, typename SReg_t> struct ThreeOpImmEmitter {
     using TypedEmitRegRegImm = void (AssemblerX86Base::*)(Type, DReg_t, SReg_t,
                                                           const Immediate &);
@@ -906,8 +906,8 @@
 
   Label *getOrCreateLabel(SizeT Number, LabelVector &Labels);
 
-  // The arith_int() methods factor out the commonality between the encodings of
-  // add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp().  The Tag
+  // The arith_int() methods factor out the commonality between the encodings
+  // of add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp(). The Tag
   // parameter is statically asserted to be less than 8.
   template <uint32_t Tag>
   void arith_int(Type Ty, typename Traits::GPRRegister reg,
@@ -957,10 +957,10 @@
            isByteSizedType(Ty);
   }
 
-  // assembleAndEmitRex is used for determining which (if any) rex prefix should
-  // be emitted for the current instruction. It allows different types for Reg
-  // and Rm because they could be of different types (e.g., in mov[sz]x
-  // instrutions.) If Addr is not nullptr, then Rm is ignored, and Rex.B is
+  // assembleAndEmitRex is used for determining which (if any) rex prefix
+  // should be emitted for the current instruction. It allows different types
+  // for Reg and Rm because they could be of different types (e.g., in mov[sz]x
+  // instructions.) If Addr is not nullptr, then Rm is ignored, and Rex.B is
   // determined by Addr instead. TyRm is still used to determine Addr's size.
   template <typename RegType, typename RmType, typename T = Traits>
   typename std::enable_if<T::Is64Bit, void>::type
@@ -1005,9 +1005,9 @@
     assembleAndEmitRex(TyReg, Reg, TyRm, Rm);
   }
 
-  // emitRexB is used for emitting a Rex prefix if one is needed on encoding the
-  // Reg field in an x86 instruction. It is invoked by the template when Reg is
-  // the single register operand in the instruction (e.g., push Reg.)
+  // emitRexB is used for emitting a Rex prefix if one is needed on encoding
+  // the Reg field in an x86 instruction. It is invoked by the template when
+  // Reg is the single register operand in the instruction (e.g., push Reg.)
   template <typename RmType> void emitRexB(const Type Ty, const RmType Rm) {
     emitRexRB(Ty, RexRegIrrelevant, Ty, Rm);
   }
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index f449dae..b1013d6 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -1068,9 +1068,8 @@
 }
 
 // {add,sub,mul,div}ps are given a Ty parameter for consistency with
-// {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows
-// addpd, etc., we can use the Ty parameter to decide on adding
-// a 0x66 prefix.
+// {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows addpd, etc.,
+// we can use the Ty parameter to decide on adding a 0x66 prefix.
 template <class Machine>
 void AssemblerX86Base<Machine>::addps(Type /* Ty */,
                                       typename Traits::XmmRegister dst,
@@ -1836,8 +1835,8 @@
     emitUint8(0x0F);
     emitUint8(0x3A);
     emitUint8(isByteSizedType(Ty) ? 0x14 : 0x16);
-    // SSE 4.1 versions are "MRI" because dst can be mem, while
-    // pextrw (SSE2) is RMI because dst must be reg.
+    // SSE 4.1 versions are "MRI" because dst can be mem, while pextrw (SSE2)
+    // is RMI because dst must be reg.
     emitXmmRegisterOperand(src, dst);
     emitUint8(imm.value());
   }
@@ -2147,11 +2146,11 @@
 void AssemblerX86Base<Machine>::test(Type Ty, typename Traits::GPRRegister reg,
                                      const Immediate &immediate) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  // For registers that have a byte variant (EAX, EBX, ECX, and EDX)
-  // we only test the byte register to keep the encoding short.
-  // This is legal even if the register had high bits set since
-  // this only sets flags registers based on the "AND" of the two operands,
-  // and the immediate had zeros at those high bits.
+  // For registers that have a byte variant (EAX, EBX, ECX, and EDX) we only
+  // test the byte register to keep the encoding short. This is legal even if
+  // the register had high bits set since this only sets flags registers based
+  // on the "AND" of the two operands, and the immediate had zeros at those
+  // high bits.
   if (immediate.is_uint8() && reg <= Traits::Last8BitGPR) {
     // Use zero-extended 8-bit immediate.
     emitRexB(Ty, reg);
@@ -2183,8 +2182,8 @@
                                      const typename Traits::Address &addr,
                                      const Immediate &immediate) {
   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
-  // If the immediate is short, we only test the byte addr to keep the
-  // encoding short.
+  // If the immediate is short, we only test the byte addr to keep the encoding
+  // short.
   if (immediate.is_uint8()) {
     // Use zero-extended 8-bit immediate.
     emitRex(Ty, addr, RexRegIrrelevant);
@@ -3016,10 +3015,10 @@
       // TODO(stichnot): Here and in jmp(), we may need to be more
       // conservative about the backward branch distance if the branch
       // instruction is within a bundle_lock sequence, because the
-      // distance may increase when padding is added.  This isn't an
-      // issue for branches outside a bundle_lock, because if padding
-      // is added, the retry may change it to a long backward branch
-      // without affecting any of the bookkeeping.
+      // distance may increase when padding is added. This isn't an issue for
+      // branches outside a bundle_lock, because if padding is added, the retry
+      // may change it to a long backward branch without affecting any of the
+      // bookkeeping.
       emitUint8(0x70 + condition);
       emitUint8((offset - kShortSize) & 0xFF);
     } else {
diff --git a/src/IceBrowserCompileServer.cpp b/src/IceBrowserCompileServer.cpp
index 03127a9..4d5705f 100644
--- a/src/IceBrowserCompileServer.cpp
+++ b/src/IceBrowserCompileServer.cpp
@@ -106,14 +106,13 @@
   BrowserCompileServer *Server =
       reinterpret_cast<BrowserCompileServer *>(UserData);
   Server->setFatalError(Reason);
-  // Only kill the current thread instead of the whole process.
-  // We need the server thread to remain alive in order to respond with the
-  // error message.
+  // Only kill the current thread instead of the whole process. We need the
+  // server thread to remain alive in order to respond with the error message.
   // We could also try to pthread_kill all other worker threads, but
-  // pthread_kill / raising signals is not supported by NaCl.
-  // We'll have to assume that the worker/emitter threads will be well behaved
-  // after a fatal error in other threads, and either get stuck waiting
-  // on input from a previous stage, or also call report_fatal_error.
+  // pthread_kill / raising signals is not supported by NaCl. We'll have to
+  // assume that the worker/emitter threads will be well behaved after a fatal
+  // error in other threads, and either get stuck waiting on input from a
+  // previous stage, or also call report_fatal_error.
   pthread_exit(0);
 }
 
@@ -143,8 +142,8 @@
 }
 
 bool BrowserCompileServer::pushInputBytes(const void *Data, size_t NumBytes) {
-  // If there was an earlier error, do not attempt to push bytes to
-  // the QueueStreamer. Otherwise the thread could become blocked.
+  // If there was an earlier error, do not attempt to push bytes to the
+  // QueueStreamer. Otherwise the thread could become blocked.
   if (HadError.load())
     return true;
   return InputStream->PutBytes(
@@ -163,8 +162,8 @@
 ErrorCode &BrowserCompileServer::getErrorCode() {
   if (HadError.load()) {
     // HadError means report_fatal_error is called. Make sure that the
-    // LastError is not EC_None. We don't know the type of error so
-    // just pick some error category.
+    // LastError is not EC_None. We don't know the type of error so just pick
+    // some error category.
     LastError.assign(EC_Translation);
   }
   return LastError;
diff --git a/src/IceBrowserCompileServer.h b/src/IceBrowserCompileServer.h
index e76b820..f23ab17 100644
--- a/src/IceBrowserCompileServer.h
+++ b/src/IceBrowserCompileServer.h
@@ -31,12 +31,11 @@
 
 namespace Ice {
 
-/// The browser variant of the compile server.
-/// Compared to the commandline version, this version gets compile
-/// requests over IPC. Each compile request will have a slimmed down
-/// version of argc, argv while other flags are set to defaults that
-/// make sense in the browser case. The output file is specified via
-/// a posix FD, and input bytes are pushed to the server.
+/// The browser variant of the compile server. Compared to the commandline
+/// version, this version gets compile requests over IPC. Each compile request
+/// will have a slimmed down version of argc, argv while other flags are set to
+/// defaults that make sense in the browser case. The output file is specified
+/// via a posix FD, and input bytes are pushed to the server.
 class BrowserCompileServer : public CompileServer {
   BrowserCompileServer() = delete;
   BrowserCompileServer(const BrowserCompileServer &) = delete;
@@ -56,12 +55,12 @@
   /// Parse and set up the flags for compile jobs.
   void getParsedFlags(uint32_t NumThreads, int argc, char **argv);
 
-  /// Creates the streams + context and starts the compile thread,
-  /// handing off the streams + context.
+  /// Creates the streams + context and starts the compile thread, handing off
+  /// the streams + context.
   void startCompileThread(int OutFD);
 
-  /// Call to push more bytes to the current input stream.
-  /// Returns false on success and true on error.
+  /// Call to push more bytes to the current input stream. Returns false on
+  /// success and true on error.
   bool pushInputBytes(const void *Data, size_t NumBytes);
 
   /// Notify the input stream of EOF.
@@ -72,9 +71,8 @@
     CompileThread.join();
     if (Ctx->getErrorStatus()->value())
       LastError.assign(Ctx->getErrorStatus()->value());
-    // Reset some state. The InputStream is deleted by the compiler
-    // so only reset this to nullptr. Free and flush the rest
-    // of the streams.
+    // Reset some state. The InputStream is deleted by the compiler so only
+    // reset this to nullptr. Free and flush the rest of the streams.
     InputStream = nullptr;
     EmitStream.reset(nullptr);
     ELFStream.reset(nullptr);
@@ -95,12 +93,12 @@
     std::string Buffer;
     llvm::raw_string_ostream StrBuf;
   };
-  /// This currently only handles a single compile request, hence one copy
-  /// of the state.
+  /// This currently only handles a single compile request, hence one copy of
+  /// the state.
   std::unique_ptr<GlobalContext> Ctx;
-  /// A borrowed reference to the current InputStream. The compiler owns
-  /// the actual reference so the server must be careful not to access
-  /// after the compiler is done.
+  /// A borrowed reference to the current InputStream. The compiler owns the
+  /// actual reference so the server must be careful not to access after the
+  /// compiler is done.
   llvm::QueueStreamer *InputStream = nullptr;
   std::unique_ptr<Ostream> LogStream;
   std::unique_ptr<llvm::raw_fd_ostream> EmitStream;
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index ed60abb..4c703cf 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -8,8 +8,7 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements the Cfg class, including constant pool
-/// management.
+/// This file implements the Cfg class, including constant pool management.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -46,8 +45,8 @@
       TargetAssembler(TargetLowering::createAssembler(
           Ctx->getFlags().getTargetArch(), this)) {
   if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Randomize) {
-    // If -randomize-pool-immediates=randomize, create a random number generator
-    // to generate a cookie for constant blinding.
+    // If -randomize-pool-immediates=randomize, create a random number
+    // generator to generate a cookie for constant blinding.
     RandomNumberGenerator RNG(Ctx->getFlags().getRandomSeed(),
                               RPE_ConstantBlinding, this->SequenceNumber);
     ConstantBlindingCookie =
@@ -86,8 +85,8 @@
   ImplicitArgs.push_back(Arg);
 }
 
-// Returns whether the stack frame layout has been computed yet.  This
-// is used for dumping the stack frame location of Variables.
+// Returns whether the stack frame layout has been computed yet. This is used
+// for dumping the stack frame location of Variables.
 bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }
 
 namespace {
@@ -157,8 +156,8 @@
 void Cfg::translate() {
   if (hasError())
     return;
-  // FunctionTimer conditionally pushes/pops a TimerMarker if
-  // TimeEachFunction is enabled.
+  // FunctionTimer conditionally pushes/pops a TimerMarker if TimeEachFunction
+  // is enabled.
   std::unique_ptr<TimerMarker> FunctionTimer;
   if (BuildDefs::dump()) {
     const IceString &TimingFocusOn =
@@ -180,16 +179,16 @@
 
   if (getContext()->getFlags().getEnableBlockProfile()) {
     profileBlocks();
-    // TODO(jpp): this is fragile, at best. Figure out a better way of detecting
-    // exit functions.
+    // TODO(jpp): this is fragile, at best. Figure out a better way of
+    // detecting exit functions.
     if (GlobalContext::matchSymbolName(getFunctionName(), "exit")) {
       addCallToProfileSummary();
     }
     dump("Profiled CFG");
   }
 
-  // The set of translation passes and their order are determined by
-  // the target.
+  // The set of translation passes and their order are determined by the
+  // target.
   getTarget()->translate();
 
   dump("Final output");
@@ -273,8 +272,8 @@
   for (Variable *Var : Variables) {
     Var->getLiveRange().reset();
   }
-  // This splits edges and appends new nodes to the end of the node
-  // list.  This can invalidate iterators, so don't use an iterator.
+  // This splits edges and appends new nodes to the end of the node list. This
+  // can invalidate iterators, so don't use an iterator.
   SizeT NumNodes = getNumNodes();
   SizeT NumVars = getNumVariables();
   for (SizeT I = 0; I < NumNodes; ++I)
@@ -282,8 +281,8 @@
 
   TimerMarker TT(TimerStack::TT_lowerPhiAssignments, this);
   if (true) {
-    // The following code does an in-place update of liveness and live ranges as
-    // a result of adding the new phi edge split nodes.
+    // The following code does an in-place update of liveness and live ranges
+    // as a result of adding the new phi edge split nodes.
     getLiveness()->initPhiEdgeSplits(Nodes.begin() + NumNodes,
                                      Variables.begin() + NumVars);
     TimerMarker TTT(TimerStack::TT_liveness, this);
@@ -297,7 +296,7 @@
     }
   } else {
     // The following code does a brute-force recalculation of live ranges as a
-    // result of adding the new phi edge split nodes.  The liveness calculation
+    // result of adding the new phi edge split nodes. The liveness calculation
     // is particularly expensive because the new nodes are not yet in a proper
     // topological order and so convergence is slower.
     //
@@ -310,27 +309,25 @@
   Target->regAlloc(RAK_Phi);
 }
 
-// Find a reasonable placement for nodes that have not yet been
-// placed, while maintaining the same relative ordering among already
-// placed nodes.
+// Find a reasonable placement for nodes that have not yet been placed, while
+// maintaining the same relative ordering among already placed nodes.
 void Cfg::reorderNodes() {
-  // TODO(ascull): it would be nice if the switch tests were always followed
-  // by the default case to allow for fall through.
+  // TODO(ascull): it would be nice if the switch tests were always followed by
+  // the default case to allow for fall through.
   using PlacedList = std::list<CfgNode *>;
   PlacedList Placed;      // Nodes with relative placement locked down
   PlacedList Unreachable; // Unreachable nodes
   PlacedList::iterator NoPlace = Placed.end();
-  // Keep track of where each node has been tentatively placed so that
-  // we can manage insertions into the middle.
+  // Keep track of where each node has been tentatively placed so that we can
+  // manage insertions into the middle.
   std::vector<PlacedList::iterator> PlaceIndex(Nodes.size(), NoPlace);
   for (CfgNode *Node : Nodes) {
-    // The "do ... while(0);" construct is to factor out the
-    // --PlaceIndex and assert() statements before moving to the next
-    // node.
+    // The "do ... while(0);" construct is to factor out the --PlaceIndex and
+    // assert() statements before moving to the next node.
     do {
       if (Node != getEntryNode() && Node->getInEdges().empty()) {
-        // The node has essentially been deleted since it is not a
-        // successor of any other node.
+        // The node has essentially been deleted since it is not a successor of
+        // any other node.
         Unreachable.push_back(Node);
         PlaceIndex[Node->getIndex()] = Unreachable.end();
         Node->setNeedsPlacement(false);
@@ -343,10 +340,10 @@
         continue;
       }
       Node->setNeedsPlacement(false);
-      // Assume for now that the unplaced node is from edge-splitting
-      // and therefore has 1 in-edge and 1 out-edge (actually, possibly
-      // more than 1 in-edge if the predecessor node was contracted).
-      // If this changes in the future, rethink the strategy.
+      // Assume for now that the unplaced node is from edge-splitting and
+      // therefore has 1 in-edge and 1 out-edge (actually, possibly more than 1
+      // in-edge if the predecessor node was contracted). If this changes in
+      // the future, rethink the strategy.
       assert(Node->getInEdges().size() >= 1);
       assert(Node->getOutEdges().size() == 1);
 
@@ -363,8 +360,8 @@
       // Otherwise, place it after the (first) predecessor.
       CfgNode *Pred = Node->getInEdges().front();
       auto PredPosition = PlaceIndex[Pred->getIndex()];
-      // It shouldn't be the case that PredPosition==NoPlace, but if
-      // that somehow turns out to be true, we just insert Node before
+      // It shouldn't be the case that PredPosition==NoPlace, but if that
+      // somehow turns out to be true, we just insert Node before
       // PredPosition=NoPlace=Placed.end() .
       if (PredPosition != NoPlace)
         ++PredPosition;
@@ -475,9 +472,9 @@
   LA.computeLoopNestDepth();
 }
 
-// This is a lightweight version of live-range-end calculation.  Marks the last
+// This is a lightweight version of live-range-end calculation. Marks the last
 // use of only those variables whose definition and uses are completely with a
-// single block.  It is a quick single pass and doesn't need to iterate until
+// single block. It is a quick single pass and doesn't need to iterate until
 // convergence.
 void Cfg::livenessLightweight() {
   TimerMarker T(TimerStack::TT_livenessLightweight, this);
@@ -513,9 +510,9 @@
     for (Variable *Var : Variables)
       Var->resetLiveRange();
   }
-  // Make a final pass over each node to delete dead instructions,
-  // collect the first and last instruction numbers, and add live
-  // range segments for that node.
+  // Make a final pass over each node to delete dead instructions, collect the
+  // first and last instruction numbers, and add live range segments for that
+  // node.
   for (CfgNode *Node : Nodes) {
     InstNumberT FirstInstNum = Inst::NumberSentinel;
     InstNumberT LastInstNum = Inst::NumberSentinel;
@@ -538,19 +535,18 @@
       }
     }
     if (Mode == Liveness_Intervals) {
-      // Special treatment for live in-args.  Their liveness needs to
-      // extend beyond the beginning of the function, otherwise an arg
-      // whose only use is in the first instruction will end up having
-      // the trivial live range [2,2) and will *not* interfere with
-      // other arguments.  So if the first instruction of the method
-      // is "r=arg1+arg2", both args may be assigned the same
-      // register.  This is accomplished by extending the entry
-      // block's instruction range from [2,n) to [1,n) which will
-      // transform the problematic [2,2) live ranges into [1,2).
+      // Special treatment for live in-args. Their liveness needs to extend
+      // beyond the beginning of the function, otherwise an arg whose only use
+      // is in the first instruction will end up having the trivial live range
+      // [2,2) and will *not* interfere with other arguments. So if the first
+      // instruction of the method is "r=arg1+arg2", both args may be assigned
+      // the same register. This is accomplished by extending the entry block's
+      // instruction range from [2,n) to [1,n) which will transform the
+      // problematic [2,2) live ranges into [1,2).
       if (Node == getEntryNode()) {
-        // TODO(stichnot): Make it a strict requirement that the entry
-        // node gets the lowest instruction numbers, so that extending
-        // the live range for in-args is guaranteed to work.
+        // TODO(stichnot): Make it a strict requirement that the entry node
+        // gets the lowest instruction numbers, so that extending the live
+        // range for in-args is guaranteed to work.
         FirstInstNum = Inst::NumberExtended;
       }
       Node->livenessAddIntervals(getLiveness(), FirstInstNum, LastInstNum);
@@ -558,8 +554,8 @@
   }
 }
 
-// Traverse every Variable of every Inst and verify that it
-// appears within the Variable's computed live range.
+// Traverse every Variable of every Inst and verify that it appears within the
+// Variable's computed live range.
 bool Cfg::validateLiveness() const {
   TimerMarker T(TimerStack::TT_validateLiveness, this);
   bool Valid = true;
@@ -579,13 +575,12 @@
           const bool IsDest = true;
           if (!Dest->getLiveRange().containsValue(InstNumber, IsDest))
             Invalid = true;
-          // Check that this instruction actually *begins* Dest's live
-          // range, by checking that Dest is not live in the previous
-          // instruction.  As a special exception, we don't check this
-          // for the first instruction of the block, because a Phi
-          // temporary may be live at the end of the previous block,
-          // and if it is also assigned in the first instruction of
-          // this block, the adjacent live ranges get merged.
+          // Check that this instruction actually *begins* Dest's live range,
+          // by checking that Dest is not live in the previous instruction. As
+          // a special exception, we don't check this for the first instruction
+          // of the block, because a Phi temporary may be live at the end of
+          // the previous block, and if it is also assigned in the first
+          // instruction of this block, the adjacent live ranges get merged.
           if (static_cast<class Inst *>(&Inst) != FirstInst &&
               !Inst.isDestNonKillable() &&
               Dest->getLiveRange().containsValue(InstNumber - 1, IsDest))
@@ -642,9 +637,9 @@
 
 // ======================== Dump routines ======================== //
 
-// emitTextHeader() is not target-specific (apart from what is
-// abstracted by the Assembler), so it is defined here rather than in
-// the target lowering class.
+// emitTextHeader() is not target-specific (apart from what is abstracted by
+// the Assembler), so it is defined here rather than in the target lowering
+// class.
 void Cfg::emitTextHeader(const IceString &MangledName, GlobalContext *Ctx,
                          const Assembler *Asm) {
   if (!BuildDefs::dump())
@@ -674,8 +669,8 @@
   switch (Ctx->getFlags().getOutFileType()) {
   case FT_Elf:
   case FT_Iasm: {
-    // The emission needs to be delayed until the after the text section so save
-    // the offsets in the global context.
+    // The emission needs to be delayed until the after the text section so
+    // save the offsets in the global context.
     IceString MangledName = Ctx->mangleName(getFunctionName());
     for (const InstJumpTable *JumpTable : JumpTables) {
       SizeT NumTargets = JumpTable->getNumTargets();
@@ -726,8 +721,8 @@
 
 void Cfg::emitIAS() {
   TimerMarker T(TimerStack::TT_emit, this);
-  // The emitIAS() routines emit into the internal assembler buffer,
-  // so there's no need to lock the streams.
+  // The emitIAS() routines emit into the internal assembler buffer, so there's
+  // no need to lock the streams.
   deleteJumpTableInsts();
   const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
   for (CfgNode *Node : Nodes) {
diff --git a/src/IceCfg.h b/src/IceCfg.h
index ca9d706..4147dd9 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares the Cfg class, which represents the control flow
-/// graph and the overall per-function compilation context.
+/// This file declares the Cfg class, which represents the control flow graph
+/// and the overall per-function compilation context.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -50,9 +50,9 @@
   GlobalContext *getContext() const { return Ctx; }
   uint32_t getSequenceNumber() const { return SequenceNumber; }
 
-  /// Returns true if any of the specified options in the verbose mask
-  /// are set.  If the argument is omitted, it checks if any verbose
-  /// options at all are set.
+  /// Returns true if any of the specified options in the verbose mask are set.
+  /// If the argument is omitted, it checks if any verbose options at all are
+  /// set.
   bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; }
   void setVerbose(VerboseMask Mask) { VMask = Mask; }
 
@@ -72,11 +72,10 @@
   /// \name Manage errors.
   /// @{
 
-  /// Translation error flagging.  If support for some construct is
-  /// known to be missing, instead of an assertion failure, setError()
-  /// should be called and the error should be propagated back up.
-  /// This way, we can gracefully fail to translate and let a fallback
-  /// translator handle the function.
+  /// Translation error flagging. If support for some construct is known to be
+  /// missing, instead of an assertion failure, setError() should be called and
+  /// the error should be propagated back up. This way, we can gracefully fail
+  /// to translate and let a fallback translator handle the function.
   void setError(const IceString &Message);
   bool hasError() const { return HasError; }
   IceString getError() const { return ErrorMessage; }
@@ -98,11 +97,10 @@
   /// @}
 
   using IdentifierIndexType = int32_t;
-  /// Adds a name to the list and returns its index, suitable for the
-  /// argument to getIdentifierName().  No checking for duplicates is
-  /// done.  This is generally used for node names and variable names
-  /// to avoid embedding a std::string inside an arena-allocated
-  /// object.
+  /// Adds a name to the list and returns its index, suitable for the argument
+  /// to getIdentifierName(). No checking for duplicates is done. This is
+  /// generally used for node names and variable names to avoid embedding a
+  /// std::string inside an arena-allocated object.
   IdentifierIndexType addIdentifierName(const IceString &Name) {
     IdentifierIndexType Index = IdentifierNames.size();
     IdentifierNames.push_back(Name);
@@ -122,8 +120,8 @@
   /// \name Manage Variables.
   /// @{
 
-  /// Create a new Variable with a particular type and an optional
-  /// name.  The Node argument is the node where the variable is defined.
+  /// Create a new Variable with a particular type and an optional name. The
+  /// Node argument is the node where the variable is defined.
   // TODO(jpp): untemplate this with separate methods: makeVariable,
   // makeSpillVariable, and makeStackVariable.
   template <typename T = Variable> T *makeVariable(Type Ty) {
@@ -176,9 +174,9 @@
 
   /// Passes over the CFG.
   void translate();
-  /// After the CFG is fully constructed, iterate over the nodes and
-  /// compute the predecessor and successor edges, in the form of
-  /// CfgNode::InEdges[] and CfgNode::OutEdges[].
+  /// After the CFG is fully constructed, iterate over the nodes and compute the
+  /// predecessor and successor edges, in the form of CfgNode::InEdges[] and
+  /// CfgNode::OutEdges[].
   void computeInOutEdges();
   void renumberInstructions();
   void placePhiLoads();
@@ -277,16 +275,15 @@
   std::unique_ptr<VariableDeclarationList> GlobalInits;
   std::vector<InstJumpTable *> JumpTables;
 
-  /// CurrentNode is maintained during dumping/emitting just for
-  /// validating Variable::DefNode.  Normally, a traversal over
-  /// CfgNodes maintains this, but before global operations like
-  /// register allocation, resetCurrentNode() should be called to avoid
-  /// spurious validation failures.
+  /// CurrentNode is maintained during dumping/emitting just for validating
+  /// Variable::DefNode. Normally, a traversal over CfgNodes maintains this, but
+  /// before global operations like register allocation, resetCurrentNode()
+  /// should be called to avoid spurious validation failures.
   const CfgNode *CurrentNode = nullptr;
 
-  /// Maintain a pointer in TLS to the current Cfg being translated.
-  /// This is primarily for accessing its allocator statelessly, but
-  /// other uses are possible.
+  /// Maintain a pointer in TLS to the current Cfg being translated. This is
+  /// primarily for accessing its allocator statelessly, but other uses are
+  /// possible.
   ICE_TLS_DECLARE_FIELD(const Cfg *, CurrentCfg);
 
 public:
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 0ccc6ea..31a6e8a 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements the CfgNode class, including the complexities
-/// of instruction insertion and in-edge calculation.
+/// This file implements the CfgNode class, including the complexities of
+/// instruction insertion and in-edge calculation.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -29,17 +29,16 @@
 CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber)
     : Func(Func), Number(LabelNumber), LabelNumber(LabelNumber) {}
 
-// Returns the name the node was created with.  If no name was given,
-// it synthesizes a (hopefully) unique name.
+// Returns the name the node was created with. If no name was given, it
+// synthesizes a (hopefully) unique name.
 IceString CfgNode::getName() const {
   if (NameIndex >= 0)
     return Func->getIdentifierName(NameIndex);
   return "__" + std::to_string(LabelNumber);
 }
 
-// Adds an instruction to either the Phi list or the regular
-// instruction list.  Validates that all Phis are added before all
-// regular instructions.
+// Adds an instruction to either the Phi list or the regular instruction list.
+// Validates that all Phis are added before all regular instructions.
 void CfgNode::appendInst(Inst *Inst) {
   ++InstCountEstimate;
   if (InstPhi *Phi = llvm::dyn_cast<InstPhi>(Inst)) {
@@ -53,11 +52,10 @@
   }
 }
 
-// Renumbers the non-deleted instructions in the node.  This needs to
-// be done in preparation for live range analysis.  The instruction
-// numbers in a block must be monotonically increasing.  The range of
-// instruction numbers in a block, from lowest to highest, must not
-// overlap with the range of any other block.
+// Renumbers the non-deleted instructions in the node. This needs to be done in
+// preparation for live range analysis. The instruction numbers in a block must
+// be monotonically increasing. The range of instruction numbers in a block,
+// from lowest to highest, must not overlap with the range of any other block.
 void CfgNode::renumberInstructions() {
   InstNumberT FirstNumber = Func->getNextInstNumber();
   for (Inst &I : Phis)
@@ -67,10 +65,9 @@
   InstCountEstimate = Func->getNextInstNumber() - FirstNumber;
 }
 
-// When a node is created, the OutEdges are immediately known, but the
-// InEdges have to be built up incrementally.  After the CFG has been
-// constructed, the computePredecessors() pass finalizes it by
-// creating the InEdges list.
+// When a node is created, the OutEdges are immediately known, but the InEdges
+// have to be built up incrementally. After the CFG has been constructed, the
+// computePredecessors() pass finalizes it by creating the InEdges list.
 void CfgNode::computePredecessors() {
   for (CfgNode *Succ : OutEdges)
     Succ->InEdges.push_back(this);
@@ -80,19 +77,19 @@
   OutEdges = Insts.rbegin()->getTerminatorEdges();
 }
 
-// Validate each Phi instruction in the node with respect to control flow.  For
-// every phi argument, its label must appear in the predecessor list.  For each
-// predecessor, there must be a phi argument with that label.  We don't check
+// Validate each Phi instruction in the node with respect to control flow. For
+// every phi argument, its label must appear in the predecessor list. For each
+// predecessor, there must be a phi argument with that label. We don't check
 // that phi arguments with the same label have the same value.
 void CfgNode::validatePhis() {
   for (Inst &Instr : Phis) {
     auto *Phi = llvm::cast<InstPhi>(&Instr);
-    // We do a simple O(N^2) algorithm to check for consistency.  Even so, it
-    // shows up as only about 0.2% of the total translation time.  But if
-    // necessary, we could improve the complexity by using a hash table to count
-    // how many times each node is referenced in the Phi instruction, and how
-    // many times each node is referenced in the incoming edge list, and compare
-    // the two for equality.
+    // We do a simple O(N^2) algorithm to check for consistency. Even so, it
+    // shows up as only about 0.2% of the total translation time. But if
+    // necessary, we could improve the complexity by using a hash table to
+    // count how many times each node is referenced in the Phi instruction, and
+    // how many times each node is referenced in the incoming edge list, and
+    // compare the two for equality.
     for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
       CfgNode *Label = Phi->getLabel(i);
       bool Found = false;
@@ -120,17 +117,17 @@
   }
 }
 
-// This does part 1 of Phi lowering, by creating a new dest variable
-// for each Phi instruction, replacing the Phi instruction's dest with
-// that variable, and adding an explicit assignment of the old dest to
-// the new dest.  For example,
+// This does part 1 of Phi lowering, by creating a new dest variable for each
+// Phi instruction, replacing the Phi instruction's dest with that variable,
+// and adding an explicit assignment of the old dest to the new dest. For
+// example,
 //   a=phi(...)
 // changes to
 //   "a_phi=phi(...); a=a_phi".
 //
-// This is in preparation for part 2 which deletes the Phi
-// instructions and appends assignment instructions to predecessor
-// blocks.  Note that this transformation preserves SSA form.
+// This is in preparation for part 2 which deletes the Phi instructions and
+// appends assignment instructions to predecessor blocks. Note that this
+// transformation preserves SSA form.
 void CfgNode::placePhiLoads() {
   for (Inst &I : Phis) {
     auto Phi = llvm::dyn_cast<InstPhi>(&I);
@@ -138,38 +135,35 @@
   }
 }
 
-// This does part 2 of Phi lowering.  For each Phi instruction at each
-// out-edge, create a corresponding assignment instruction, and add
-// all the assignments near the end of this block.  They need to be
-// added before any branch instruction, and also if the block ends
-// with a compare instruction followed by a branch instruction that we
-// may want to fuse, it's better to insert the new assignments before
-// the compare instruction. The tryOptimizedCmpxchgCmpBr() method
-// assumes this ordering of instructions.
+// This does part 2 of Phi lowering. For each Phi instruction at each out-edge,
+// create a corresponding assignment instruction, and add all the assignments
+// near the end of this block. They need to be added before any branch
+// instruction, and also if the block ends with a compare instruction followed
+// by a branch instruction that we may want to fuse, it's better to insert the
+// new assignments before the compare instruction. The
+// tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions.
 //
-// Note that this transformation takes the Phi dest variables out of
-// SSA form, as there may be assignments to the dest variable in
-// multiple blocks.
+// Note that this transformation takes the Phi dest variables out of SSA form,
+// as there may be assignments to the dest variable in multiple blocks.
 void CfgNode::placePhiStores() {
   // Find the insertion point.
   InstList::iterator InsertionPoint = Insts.end();
-  // Every block must end in a terminator instruction, and therefore
-  // must have at least one instruction, so it's valid to decrement
-  // InsertionPoint (but assert just in case).
+  // Every block must end in a terminator instruction, and therefore must have
+  // at least one instruction, so it's valid to decrement InsertionPoint (but
+  // assert just in case).
   assert(InsertionPoint != Insts.begin());
   --InsertionPoint;
-  // Confirm that InsertionPoint is a terminator instruction.  Calling
-  // getTerminatorEdges() on a non-terminator instruction will cause
-  // an llvm_unreachable().
+  // Confirm that InsertionPoint is a terminator instruction. Calling
+  // getTerminatorEdges() on a non-terminator instruction will cause an
+  // llvm_unreachable().
   (void)InsertionPoint->getTerminatorEdges();
   // SafeInsertionPoint is always immediately before the terminator
-  // instruction.  If the block ends in a compare and conditional
-  // branch, it's better to place the Phi store before the compare so
-  // as not to interfere with compare/branch fusing.  However, if the
-  // compare instruction's dest operand is the same as the new
-  // assignment statement's source operand, this can't be done due to
-  // data dependences, so we need to fall back to the
-  // SafeInsertionPoint.  To illustrate:
+  // instruction. If the block ends in a compare and conditional branch, it's
+  // better to place the Phi store before the compare so as not to interfere
+  // with compare/branch fusing. However, if the compare instruction's dest
+  // operand is the same as the new assignment statement's source operand, this
+  // can't be done due to data dependences, so we need to fall back to the
+  // SafeInsertionPoint. To illustrate:
   //   ; <label>:95
   //   %97 = load i8* %96, align 1
   //   %98 = icmp ne i8 %97, 0
@@ -188,9 +182,8 @@
   //   %100 = %100_phi
   //   %101 = %101_phi
   //
-  // TODO(stichnot): It may be possible to bypass this whole
-  // SafeInsertionPoint mechanism.  If a source basic block ends in a
-  // conditional branch:
+  // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint
+  // mechanism. If a source basic block ends in a conditional branch:
   //   labelSource:
   //   ...
   //   br i1 %foo, label %labelTrue, label %labelFalse
@@ -200,17 +193,17 @@
   // then we actually know the constant i1 value of the Phi operand:
   //   labelTrue:
   //   %bar = phi i1 [ true, %labelSource ], ...
-  // It seems that this optimization should be done by clang or opt,
-  // but we could also do it here.
+  // It seems that this optimization should be done by clang or opt, but we
+  // could also do it here.
   InstList::iterator SafeInsertionPoint = InsertionPoint;
-  // Keep track of the dest variable of a compare instruction, so that
-  // we insert the new instruction at the SafeInsertionPoint if the
-  // compare's dest matches the Phi-lowered assignment's source.
+  // Keep track of the dest variable of a compare instruction, so that we
+  // insert the new instruction at the SafeInsertionPoint if the compare's dest
+  // matches the Phi-lowered assignment's source.
   Variable *CmpInstDest = nullptr;
-  // If the current insertion point is at a conditional branch
-  // instruction, and the previous instruction is a compare
-  // instruction, then we move the insertion point before the compare
-  // instruction so as not to interfere with compare/branch fusing.
+  // If the current insertion point is at a conditional branch instruction, and
+  // the previous instruction is a compare instruction, then we move the
+  // insertion point before the compare instruction so as not to interfere with
+  // compare/branch fusing.
   if (InstBr *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) {
     if (!Branch->isUnconditional()) {
       if (InsertionPoint != Insts.begin()) {
@@ -249,13 +242,12 @@
     I.setDeleted();
 }
 
-// Splits the edge from Pred to this node by creating a new node and
-// hooking up the in and out edges appropriately.  (The EdgeIndex
-// parameter is only used to make the new node's name unique when
-// there are multiple edges between the same pair of nodes.)  The new
-// node's instruction list is initialized to the empty list, with no
-// terminator instruction. There must not be multiple edges from Pred
-// to this node so all Inst::getTerminatorEdges implementations must
+// Splits the edge from Pred to this node by creating a new node and hooking up
+// the in and out edges appropriately. (The EdgeIndex parameter is only used to
+// make the new node's name unique when there are multiple edges between the
+// same pair of nodes.) The new node's instruction list is initialized to the
+// empty list, with no terminator instruction. There must not be multiple edges
+// from Pred to this node so all Inst::getTerminatorEdges implementations must
 // not contain duplicates.
 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
   CfgNode *NewNode = Func->makeNode();
@@ -267,8 +259,8 @@
   if (BuildDefs::dump())
     NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" +
                      std::to_string(EdgeIndex));
-  // The new node is added to the end of the node list, and will later
-  // need to be sorted into a reasonable topological order.
+  // The new node is added to the end of the node list, and will later need to
+  // be sorted into a reasonable topological order.
   NewNode->setNeedsPlacement(true);
   // Repoint Pred's out-edge.
   bool Found = false;
@@ -319,31 +311,31 @@
 
 } // end of anonymous namespace
 
-// This the "advanced" version of Phi lowering for a basic block, in contrast to
-// the simple version that lowers through assignments involving temporaries.
+// This the "advanced" version of Phi lowering for a basic block, in contrast
+// to the simple version that lowers through assignments involving temporaries.
 //
 // All Phi instructions in a basic block are conceptually executed in parallel.
 // However, if we lower Phis early and commit to a sequential ordering, we may
 // end up creating unnecessary interferences which lead to worse register
-// allocation.  Delaying Phi scheduling until after register allocation can help
-// unless there are no free registers for shuffling registers or stack slots and
-// spilling becomes necessary.
+// allocation. Delaying Phi scheduling until after register allocation can help
+// unless there are no free registers for shuffling registers or stack slots
+// and spilling becomes necessary.
 //
 // The advanced Phi lowering starts by finding a topological sort of the Phi
-// instructions, where "A=B" comes before "B=C" due to the anti-dependence on B.
-// Preexisting register assignments are considered in the topological sort.  If
-// a topological sort is not possible due to a cycle, the cycle is broken by
-// introducing a non-parallel temporary.  For example, a cycle arising from a
-// permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T".  All else being
+// instructions, where "A=B" comes before "B=C" due to the anti-dependence on
+// B. Preexisting register assignments are considered in the topological sort.
+// If a topological sort is not possible due to a cycle, the cycle is broken by
+// introducing a non-parallel temporary. For example, a cycle arising from a
+// permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being
 // equal, prefer to schedule assignments with register-allocated Src operands
 // earlier, in case that register becomes free afterwards, and prefer to
 // schedule assignments with register-allocated Dest variables later, to keep
 // that register free for longer.
 //
 // Once the ordering is determined, the Cfg edge is split and the assignment
-// list is lowered by the target lowering layer.  Since the assignment lowering
+// list is lowered by the target lowering layer. Since the assignment lowering
 // may create new infinite-weight temporaries, a follow-on register allocation
-// pass will be needed.  To prepare for this, liveness (including live range
+// pass will be needed. To prepare for this, liveness (including live range
 // calculation) of the split nodes needs to be calculated, and liveness of the
 // original node need to be updated to "undo" the effects of the phi
 // assignments.
@@ -355,7 +347,7 @@
 // allocation pass is run, focusing only on pre-colored and infinite-weight
 // variables, similar to Om1 register allocation (except without the need to
 // specially compute these variables' live ranges, since they have already been
-// precisely calculated).  The register allocator in this mode needs the ability
+// precisely calculated). The register allocator in this mode needs the ability
 // to forcibly spill and reload registers in case none are naturally available.
 void CfgNode::advancedPhiLowering() {
   if (getPhis().empty())
@@ -403,17 +395,16 @@
       Desc[I].Src = Src;
       Desc[I].Processed = false;
       Desc[I].NumPred = 0;
-      // Cherry-pick any trivial assignments, so that they don't
-      // contribute to the running complexity of the topological sort.
+      // Cherry-pick any trivial assignments, so that they don't contribute to
+      // the running complexity of the topological sort.
       if (sameVarOrReg(Dest, Src)) {
         Desc[I].Processed = true;
         --Remaining;
         if (Dest != Src)
-          // If Dest and Src are syntactically the same, don't bother
-          // adding the assignment, because in all respects it would
-          // be redundant, and if Dest/Src are on the stack, the
-          // target lowering may naively decide to lower it using a
-          // temporary register.
+          // If Dest and Src are syntactically the same, don't bother adding
+          // the assignment, because in all respects it would be redundant, and
+          // if Dest/Src are on the stack, the target lowering may naively
+          // decide to lower it using a temporary register.
           Split->appendInst(InstAssign::create(Func, Dest, Src));
       }
     }
@@ -427,8 +418,8 @@
         if (Desc[J].Processed)
           continue;
         if (I != J) {
-          // There shouldn't be two Phis with the same Dest variable
-          // or register.
+          // There shouldn't be two Phis with the same Dest variable or
+          // register.
           assert(!sameVarOrReg(Dest, Desc[J].Dest));
         }
         const Operand *Src = Desc[J].Src;
@@ -443,8 +434,7 @@
     constexpr int32_t WeightNoPreds = 4;
     // Prefer Src as a register because the register might free up.
     constexpr int32_t WeightSrcIsReg = 2;
-    // Prefer Dest not as a register because the register stays free
-    // longer.
+    // Prefer Dest not as a register because the register stays free longer.
     constexpr int32_t WeightDestNotReg = 1;
 
     for (size_t I = 0; I < NumPhis; ++I) {
@@ -461,11 +451,10 @@
       Desc[I].Weight = Weight;
     }
 
-    // Repeatedly choose and process the best candidate in the
-    // topological sort, until no candidates remain.  This
-    // implementation is O(N^2) where N is the number of Phi
-    // instructions, but with a small constant factor compared to a
-    // likely implementation of O(N) topological sort.
+    // Repeatedly choose and process the best candidate in the topological
+    // sort, until no candidates remain. This implementation is O(N^2) where N
+    // is the number of Phi instructions, but with a small constant factor
+    // compared to a likely implementation of O(N) topological sort.
     for (; Remaining; --Remaining) {
       size_t BestIndex = 0;
       int32_t BestWeight = -1;
@@ -488,9 +477,9 @@
       // Break a cycle by introducing a temporary.
       if (Desc[BestIndex].NumPred) {
         bool Found = false;
-        // If the target instruction "A=B" is part of a cycle, find
-        // the "X=A" assignment in the cycle because it will have to
-        // be rewritten as "X=tmp".
+        // If the target instruction "A=B" is part of a cycle, find the "X=A"
+        // assignment in the cycle because it will have to be rewritten as
+        // "X=tmp".
         for (size_t J = 0; !Found && J < NumPhis; ++J) {
           if (Desc[J].Processed)
             continue;
@@ -510,9 +499,8 @@
       // Now that a cycle (if any) has been broken, create the actual
       // assignment.
       Split->appendInst(InstAssign::create(Func, Dest, Src));
-      // Update NumPred for all Phi assignments using this Phi's Src
-      // as their Dest variable.  Also update Weight if NumPred
-      // dropped from 1 to 0.
+      // Update NumPred for all Phi assignments using this Phi's Src as their
+      // Dest variable. Also update Weight if NumPred dropped from 1 to 0.
       if (auto Var = llvm::dyn_cast<Variable>(Src)) {
         for (size_t I = 0; I < NumPhis; ++I) {
           if (Desc[I].Processed)
@@ -532,10 +520,9 @@
   }
 }
 
-// Does address mode optimization.  Pass each instruction to the
-// TargetLowering object.  If it returns a new instruction
-// (representing the optimized address mode), then insert the new
-// instruction and delete the old.
+// Does address mode optimization. Pass each instruction to the TargetLowering
+// object. If it returns a new instruction (representing the optimized address
+// mode), then insert the new instruction and delete the old.
 void CfgNode::doAddressOpt() {
   TargetLowering *Target = Func->getTarget();
   LoweringContext &Context = Target->getContext();
@@ -567,8 +554,8 @@
   }
 }
 
-// Drives the target lowering.  Passes the current instruction and the
-// next non-deleted instruction for target lowering.
+// Drives the target lowering. Passes the current instruction and the next
+// non-deleted instruction for target lowering.
 void CfgNode::genCode() {
   TargetLowering *Target = Func->getTarget();
   LoweringContext &Context = Target->getContext();
@@ -603,24 +590,23 @@
   }
 }
 
-// Performs liveness analysis on the block.  Returns true if the
-// incoming liveness changed from before, false if it stayed the same.
-// (If it changes, the node's predecessors need to be processed
-// again.)
+// Performs liveness analysis on the block. Returns true if the incoming
+// liveness changed from before, false if it stayed the same. (If it changes,
+// the node's predecessors need to be processed again.)
 bool CfgNode::liveness(Liveness *Liveness) {
   SizeT NumVars = Liveness->getNumVarsInNode(this);
   LivenessBV Live(NumVars);
   LiveBeginEndMap *LiveBegin = nullptr;
   LiveBeginEndMap *LiveEnd = nullptr;
-  // Mark the beginning and ending of each variable's live range
-  // with the sentinel instruction number 0.
+  // Mark the beginning and ending of each variable's live range with the
+  // sentinel instruction number 0.
   if (Liveness->getMode() == Liveness_Intervals) {
     LiveBegin = Liveness->getLiveBegin(this);
     LiveEnd = Liveness->getLiveEnd(this);
     LiveBegin->clear();
     LiveEnd->clear();
-    // Guess that the number of live ranges beginning is roughly the
-    // number of instructions, and same for live ranges ending.
+    // Guess that the number of live ranges beginning is roughly the number of
+    // instructions, and same for live ranges ending.
     LiveBegin->reserve(getInstCountEstimate());
     LiveEnd->reserve(getInstCountEstimate());
   }
@@ -643,9 +629,8 @@
       continue;
     I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd);
   }
-  // Process phis in forward order so that we can override the
-  // instruction number to be that of the earliest phi instruction in
-  // the block.
+  // Process phis in forward order so that we can override the instruction
+  // number to be that of the earliest phi instruction in the block.
   SizeT NumNonDeadPhis = 0;
   InstNumberT FirstPhiNumber = Inst::NumberSentinel;
   for (Inst &I : Phis) {
@@ -657,18 +642,17 @@
       ++NumNonDeadPhis;
   }
 
-  // When using the sparse representation, after traversing the
-  // instructions in the block, the Live bitvector should only contain
-  // set bits for global variables upon block entry.  We validate this
-  // by shrinking the Live vector and then testing it against the
-  // pre-shrunk version.  (The shrinking is required, but the
-  // validation is not.)
+  // When using the sparse representation, after traversing the instructions in
+  // the block, the Live bitvector should only contain set bits for global
+  // variables upon block entry. We validate this by shrinking the Live vector
+  // and then testing it against the pre-shrunk version. (The shrinking is
+  // required, but the validation is not.)
   LivenessBV LiveOrig = Live;
   Live.resize(Liveness->getNumGlobalVars());
   if (Live != LiveOrig) {
     if (BuildDefs::dump()) {
-      // This is a fatal liveness consistency error.  Print some
-      // diagnostics and abort.
+      // This is a fatal liveness consistency error. Print some diagnostics and
+      // abort.
       Ostream &Str = Func->getContext()->getStrDump();
       Func->resetCurrentNode();
       Str << "LiveOrig-Live =";
@@ -697,13 +681,12 @@
   return Changed;
 }
 
-// Once basic liveness is complete, compute actual live ranges.  It is
-// assumed that within a single basic block, a live range begins at
-// most once and ends at most once.  This is certainly true for pure
-// SSA form.  It is also true once phis are lowered, since each
-// assignment to the phi-based temporary is in a different basic
-// block, and there is a single read that ends the live in the basic
-// block that contained the actual phi instruction.
+// Once basic liveness is complete, compute actual live ranges. It is assumed
+// that within a single basic block, a live range begins at most once and ends
+// at most once. This is certainly true for pure SSA form. It is also true once
+// phis are lowered, since each assignment to the phi-based temporary is in a
+// different basic block, and there is a single read that ends the live in the
+// basic block that contained the actual phi instruction.
 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum,
                                    InstNumberT LastInstNum) {
   TimerMarker T1(TimerStack::TT_liveRange, Func);
@@ -736,14 +719,13 @@
     SizeT i1 = IBB == IBE ? NumVars : IBB->first;
     SizeT i2 = IEB == IEE ? NumVars : IEB->first;
     SizeT i = std::min(i1, i2);
-    // i1 is the Variable number of the next MapBegin entry, and i2 is
-    // the Variable number of the next MapEnd entry.  If i1==i2, then
-    // the Variable's live range begins and ends in this block.  If
-    // i1<i2, then i1's live range begins at instruction IBB->second
-    // and extends through the end of the block.  If i1>i2, then i2's
-    // live range begins at the first instruction of the block and
-    // ends at IEB->second.  In any case, we choose the lesser of i1
-    // and i2 and proceed accordingly.
+    // i1 is the Variable number of the next MapBegin entry, and i2 is the
+    // Variable number of the next MapEnd entry. If i1==i2, then the Variable's
+    // live range begins and ends in this block. If i1<i2, then i1's live range
+    // begins at instruction IBB->second and extends through the end of the
+    // block. If i1>i2, then i2's live range begins at the first instruction of
+    // the block and ends at IEB->second. In any case, we choose the lesser of
+    // i1 and i2 and proceed accordingly.
     InstNumberT LB = i == i1 ? IBB->second : FirstInstNum;
     InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1;
 
@@ -751,9 +733,9 @@
     if (LB > LE) {
       Var->addLiveRange(FirstInstNum, LE);
       Var->addLiveRange(LB, LastInstNum + 1);
-      // Assert that Var is a global variable by checking that its
-      // liveness index is less than the number of globals.  This
-      // ensures that the LiveInAndOut[] access is valid.
+      // Assert that Var is a global variable by checking that its liveness
+      // index is less than the number of globals. This ensures that the
+      // LiveInAndOut[] access is valid.
       assert(i < Liveness->getNumGlobalVars());
       LiveInAndOut[i] = false;
     } else {
@@ -774,8 +756,8 @@
 }
 
 // If this node contains only deleted instructions, and ends in an
-// unconditional branch, contract the node by repointing all its
-// in-edges to its successor.
+// unconditional branch, contract the node by repointing all its in-edges to
+// its successor.
 void CfgNode::contractIfEmpty() {
   if (InEdges.empty())
     return;
@@ -795,10 +777,10 @@
 
   Branch->setDeleted();
   CfgNode *Successor = OutEdges.front();
-  // Repoint all this node's in-edges to this node's successor, unless
-  // this node's successor is actually itself (in which case the
-  // statement "OutEdges.front()->InEdges.push_back(Pred)" could
-  // invalidate the iterator over this->InEdges).
+  // Repoint all this node's in-edges to this node's successor, unless this
+  // node's successor is actually itself (in which case the statement
+  // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator
+  // over this->InEdges).
   if (Successor != this) {
     for (CfgNode *Pred : InEdges) {
       for (CfgNode *&I : Pred->OutEdges) {
@@ -814,8 +796,8 @@
     }
 
     // Remove the in-edge to the successor to allow node reordering to make
-    // better decisions. For example it's more helpful to place a node after
-    // a reachable predecessor than an unreachable one (like the one we just
+    // better decisions. For example it's more helpful to place a node after a
+    // reachable predecessor than an unreachable one (like the one we just
     // contracted).
     Successor->InEdges.erase(
         std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this));
@@ -826,10 +808,10 @@
 void CfgNode::doBranchOpt(const CfgNode *NextNode) {
   TargetLowering *Target = Func->getTarget();
   // Find the first opportunity for branch optimization (which will be the last
-  // instruction in the block) and stop. This is sufficient unless there is some
-  // target lowering where we have the possibility of multiple optimizations per
-  // block. Take care with switch lowering as there are multiple unconditional
-  // branches and only the last can be deleted.
+  // instruction in the block) and stop. This is sufficient unless there is
+  // some target lowering where we have the possibility of multiple
+  // optimizations per block. Take care with switch lowering as there are
+  // multiple unconditional branches and only the last can be deleted.
   for (Inst &I : reverse_range(Insts)) {
     if (!I.isDeleted()) {
       Target->doBranchOpt(&I, NextNode);
@@ -869,8 +851,8 @@
         }
       }
     }
-    // Sort the variables by regnum so they are always printed in a
-    // familiar order.
+    // Sort the variables by regnum so they are always printed in a familiar
+    // order.
     std::sort(LiveRegs.begin(), LiveRegs.end(),
               [](const Variable *V1, const Variable *V2) {
                 return V1->getRegNum() < V2->getRegNum();
@@ -892,11 +874,11 @@
     return;
   bool First = true;
   Variable *Dest = Instr->getDest();
-  // Normally we increment the live count for the dest register.  But
-  // we shouldn't if the instruction's IsDestNonKillable flag is set,
-  // because this means that the target lowering created this
-  // instruction as a non-SSA assignment; i.e., a different, previous
-  // instruction started the dest variable's live range.
+  // Normally we increment the live count for the dest register. But we
+  // shouldn't if the instruction's IsDestNonKillable flag is set, because this
+  // means that the target lowering created this instruction as a non-SSA
+  // assignment; i.e., a different, previous instruction started the dest
+  // variable's live range.
   if (!Instr->isDestNonKillable() && Dest && Dest->hasReg())
     ++LiveRegCount[Dest->getRegNum()];
   FOREACH_VAR_IN_INST(Var, *Instr) {
@@ -921,8 +903,8 @@
 void updateStats(Cfg *Func, const Inst *I) {
   if (!BuildDefs::dump())
     return;
-  // Update emitted instruction count, plus fill/spill count for
-  // Variable operands without a physical register.
+  // Update emitted instruction count, plus fill/spill count for Variable
+  // operands without a physical register.
   if (uint32_t Count = I->getEmitInstCount()) {
     Func->getContext()->statsUpdateEmitted(Count);
     if (Variable *Dest = I->getDest()) {
@@ -949,10 +931,10 @@
   bool DecorateAsm =
       Liveness && Func->getContext()->getFlags().getDecorateAsm();
   Str << getAsmName() << ":\n";
-  // LiveRegCount keeps track of the number of currently live
-  // variables that each register is assigned to.  Normally that would
-  // be only 0 or 1, but the register allocator's AllowOverlap
-  // inference allows it to be greater than 1 for short periods.
+  // LiveRegCount keeps track of the number of currently live variables that
+  // each register is assigned to. Normally that would be only 0 or 1, but the
+  // register allocator's AllowOverlap inference allows it to be greater than 1
+  // for short periods.
   std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
   if (DecorateAsm) {
     constexpr bool IsLiveIn = true;
@@ -969,15 +951,14 @@
     if (I.isDeleted())
       continue;
     if (I.isRedundantAssign()) {
-      // Usually, redundant assignments end the live range of the src
-      // variable and begin the live range of the dest variable, with
-      // no net effect on the liveness of their register.  However, if
-      // the register allocator infers the AllowOverlap condition,
-      // then this may be a redundant assignment that does not end the
-      // src variable's live range, in which case the active variable
-      // count for that register needs to be bumped.  That normally
-      // would have happened as part of emitLiveRangesEnded(), but
-      // that isn't called for redundant assignments.
+      // Usually, redundant assignments end the live range of the src variable
+      // and begin the live range of the dest variable, with no net effect on
+      // the liveness of their register. However, if the register allocator
+      // infers the AllowOverlap condition, then this may be a redundant
+      // assignment that does not end the src variable's live range, in which
+      // case the active variable count for that register needs to be bumped.
+      // That normally would have happened as part of emitLiveRangesEnded(),
+      // but that isn't called for redundant assignments.
       Variable *Dest = I.getDest();
       if (DecorateAsm && Dest->hasReg() && !I.isLastUse(I.getSrc(0)))
         ++LiveRegCount[Dest->getRegNum()];
@@ -1010,41 +991,38 @@
         BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {}
   // Check whether we're currently within a bundle_lock region.
   bool isInBundleLockRegion() const { return BundleLockStart != End; }
-  // Check whether the current bundle_lock region has the align_to_end
-  // option.
+  // Check whether the current bundle_lock region has the align_to_end option.
   bool isAlignToEnd() const {
     assert(isInBundleLockRegion());
     return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
            InstBundleLock::Opt_AlignToEnd;
   }
-  // Check whether the entire bundle_lock region falls within the same
-  // bundle.
+  // Check whether the entire bundle_lock region falls within the same bundle.
   bool isSameBundle() const {
     assert(isInBundleLockRegion());
     return SizeSnapshotPre == SizeSnapshotPost ||
            (SizeSnapshotPre & BundleMaskHi) ==
                ((SizeSnapshotPost - 1) & BundleMaskHi);
   }
-  // Get the bundle alignment of the first instruction of the
-  // bundle_lock region.
+  // Get the bundle alignment of the first instruction of the bundle_lock
+  // region.
   intptr_t getPreAlignment() const {
     assert(isInBundleLockRegion());
     return SizeSnapshotPre & BundleMaskLo;
   }
-  // Get the bundle alignment of the first instruction past the
-  // bundle_lock region.
+  // Get the bundle alignment of the first instruction past the bundle_lock
+  // region.
   intptr_t getPostAlignment() const {
     assert(isInBundleLockRegion());
     return SizeSnapshotPost & BundleMaskLo;
   }
-  // Get the iterator pointing to the bundle_lock instruction, e.g. to
-  // roll back the instruction iteration to that point.
+  // Get the iterator pointing to the bundle_lock instruction, e.g. to roll
+  // back the instruction iteration to that point.
   InstList::const_iterator getBundleLockStart() const {
     assert(isInBundleLockRegion());
     return BundleLockStart;
   }
-  // Set up bookkeeping when the bundle_lock instruction is first
-  // processed.
+  // Set up bookkeeping when the bundle_lock instruction is first processed.
   void enterBundleLock(InstList::const_iterator I) {
     assert(!isInBundleLockRegion());
     BundleLockStart = I;
@@ -1053,18 +1031,16 @@
     Target->snapshotEmitState();
     assert(isInBundleLockRegion());
   }
-  // Update bookkeeping when the bundle_unlock instruction is
-  // processed.
+  // Update bookkeeping when the bundle_unlock instruction is processed.
   void enterBundleUnlock() {
     assert(isInBundleLockRegion());
     SizeSnapshotPost = Asm->getBufferSize();
   }
-  // Update bookkeeping when we are completely finished with the
-  // bundle_lock region.
+  // Update bookkeeping when we are completely finished with the bundle_lock
+  // region.
   void leaveBundleLockRegion() { BundleLockStart = End; }
-  // Check whether the instruction sequence fits within the current
-  // bundle, and if not, add nop padding to the end of the current
-  // bundle.
+  // Check whether the instruction sequence fits within the current bundle, and
+  // if not, add nop padding to the end of the current bundle.
   void padToNextBundle() {
     assert(isInBundleLockRegion());
     if (!isSameBundle()) {
@@ -1076,8 +1052,8 @@
       assert(Asm->getBufferSize() == SizeSnapshotPre);
     }
   }
-  // If align_to_end is specified, add padding such that the
-  // instruction sequences ends precisely at a bundle boundary.
+  // If align_to_end is specified, add padding such that the instruction
+  // sequences ends precisely at a bundle boundary.
   void padForAlignToEnd() {
     assert(isInBundleLockRegion());
     if (isAlignToEnd()) {
@@ -1098,8 +1074,8 @@
 private:
   Assembler *const Asm;
   TargetLowering *const Target;
-  // End is a sentinel value such that BundleLockStart==End implies
-  // that we are not in a bundle_lock region.
+  // End is a sentinel value such that BundleLockStart==End implies that we are
+  // not in a bundle_lock region.
   const InstList::const_iterator End;
   InstList::const_iterator BundleLockStart;
   const intptr_t BundleSize;
@@ -1116,9 +1092,9 @@
 void CfgNode::emitIAS(Cfg *Func) const {
   Func->setCurrentNode(this);
   Assembler *Asm = Func->getAssembler<>();
-  // TODO(stichnot): When sandboxing, defer binding the node label
-  // until just before the first instruction is emitted, to reduce the
-  // chance that a padding nop is a branch target.
+  // TODO(stichnot): When sandboxing, defer binding the node label until just
+  // before the first instruction is emitted, to reduce the chance that a
+  // padding nop is a branch target.
   Asm->bindCfgNodeLabel(getIndex());
   for (const Inst &I : Phis) {
     if (I.isDeleted())
@@ -1138,33 +1114,33 @@
     return;
   }
 
-  // The remainder of the function handles emission with sandboxing.
-  // There are explicit bundle_lock regions delimited by bundle_lock
-  // and bundle_unlock instructions.  All other instructions are
-  // treated as an implicit one-instruction bundle_lock region.
-  // Emission is done twice for each bundle_lock region.  The first
-  // pass is a preliminary pass, after which we can figure out what
-  // nop padding is needed, then roll back, and make the final pass.
+  // The remainder of the function handles emission with sandboxing. There are
+  // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock
+  // instructions. All other instructions are treated as an implicit
+  // one-instruction bundle_lock region. Emission is done twice for each
+  // bundle_lock region. The first pass is a preliminary pass, after which we
+  // can figure out what nop padding is needed, then roll back, and make the
+  // final pass.
   //
-  // Ideally, the first pass would be speculative and the second pass
-  // would only be done if nop padding were needed, but the structure
-  // of the integrated assembler makes it hard to roll back the state
-  // of label bindings, label links, and relocation fixups.  Instead,
-  // the first pass just disables all mutation of that state.
+  // Ideally, the first pass would be speculative and the second pass would
+  // only be done if nop padding were needed, but the structure of the
+  // integrated assembler makes it hard to roll back the state of label
+  // bindings, label links, and relocation fixups. Instead, the first pass just
+  // disables all mutation of that state.
 
   BundleEmitHelper Helper(Asm, Func->getTarget(), Insts);
   InstList::const_iterator End = Insts.end();
-  // Retrying indicates that we had to roll back to the bundle_lock
-  // instruction to apply padding before the bundle_lock sequence.
+  // Retrying indicates that we had to roll back to the bundle_lock instruction
+  // to apply padding before the bundle_lock sequence.
   bool Retrying = false;
   for (InstList::const_iterator I = Insts.begin(); I != End; ++I) {
     if (I->isDeleted() || I->isRedundantAssign())
       continue;
 
     if (llvm::isa<InstBundleLock>(I)) {
-      // Set up the initial bundle_lock state.  This should not happen
-      // while retrying, because the retry rolls back to the
-      // instruction following the bundle_lock instruction.
+      // Set up the initial bundle_lock state. This should not happen while
+      // retrying, because the retry rolls back to the instruction following
+      // the bundle_lock instruction.
       assert(!Retrying);
       Helper.enterBundleLock(I);
       continue;
@@ -1175,16 +1151,16 @@
       if (Retrying) {
         // Make sure all instructions are in the same bundle.
         assert(Helper.isSameBundle());
-        // If align_to_end is specified, make sure the next
-        // instruction begins the bundle.
+        // If align_to_end is specified, make sure the next instruction begins
+        // the bundle.
         assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0);
         Helper.leaveBundleLockRegion();
         Retrying = false;
       } else {
         // This is the first pass, so roll back for the retry pass.
         Helper.rollback();
-        // Pad to the next bundle if the instruction sequence crossed
-        // a bundle boundary.
+        // Pad to the next bundle if the instruction sequence crossed a bundle
+        // boundary.
         Helper.padToNextBundle();
         // Insert additional padding to make AlignToEnd work.
         Helper.padForAlignToEnd();
@@ -1215,8 +1191,8 @@
     }
   }
 
-  // Don't allow bundle locking across basic blocks, to keep the
-  // backtracking mechanism simple.
+  // Don't allow bundle locking across basic blocks, to keep the backtracking
+  // mechanism simple.
   assert(!Helper.isInBundleLockRegion());
   assert(!Retrying);
 }
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index a4744db..c6aa729 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -8,9 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares the CfgNode class, which represents a single
-/// basic block as its instruction list, in-edge list, and out-edge
-/// list.
+/// This file declares the CfgNode class, which represents a single basic block
+/// as its instruction list, in-edge list, and out-edge list.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -50,8 +49,8 @@
   void setLoopNestDepth(SizeT NewDepth) { LoopNestDepth = NewDepth; }
   SizeT getLoopNestDepth() const { return LoopNestDepth; }
 
-  /// The HasReturn flag indicates that this node contains a return
-  /// instruction and therefore needs an epilog.
+  /// The HasReturn flag indicates that this node contains a return instruction
+  /// and therefore needs an epilog.
   void setHasReturn() { HasReturn = true; }
   bool getHasReturn() const { return HasReturn; }
 
@@ -73,18 +72,17 @@
   PhiList &getPhis() { return Phis; }
   void appendInst(Inst *Inst);
   void renumberInstructions();
-  /// Rough and generally conservative estimate of the number of
-  /// instructions in the block.  It is updated when an instruction is
-  /// added, but not when deleted.  It is recomputed during
-  /// renumberInstructions().
+  /// Rough and generally conservative estimate of the number of instructions in
+  /// the block. It is updated when an instruction is added, but not when
+  /// deleted. It is recomputed during renumberInstructions().
   InstNumberT getInstCountEstimate() const { return InstCountEstimate; }
   /// @}
 
   /// \name Manage predecessors and successors.
   /// @{
 
-  /// Add a predecessor edge to the InEdges list for each of this
-  /// node's successors.
+  /// Add a predecessor edge to the InEdges list for each of this node's
+  /// successors.
   void computePredecessors();
   void computeSuccessors();
   CfgNode *splitIncomingEdge(CfgNode *Pred, SizeT InEdgeIndex);
diff --git a/src/IceClFlags.cpp b/src/IceClFlags.cpp
index 6c2e984..a27fb3f 100644
--- a/src/IceClFlags.cpp
+++ b/src/IceClFlags.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file defines commandline flags parsing.
-/// This currently relies on llvm::cl to parse.  In the future, the minimal
-/// build can have a simpler parser.
+/// This file defines commandline flags parsing. This currently relies on
+/// llvm::cl to parse. In the future, the minimal build can have a simpler
+/// parser.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -99,16 +99,15 @@
 cl::opt<bool> MockBoundsCheck("mock-bounds-check",
                               cl::desc("Mock bounds checking on loads/stores"));
 
-// Number of translation threads (in addition to the parser thread and
-// the emitter thread).  The special case of 0 means purely
-// sequential, i.e. parser, translator, and emitter all within the
-// same single thread.  (This may need a slight rework if we expand to
-// multiple parser or emitter threads.)
+// Number of translation threads (in addition to the parser thread and the
+// emitter thread). The special case of 0 means purely sequential, i.e. parser,
+// translator, and emitter all within the same single thread. (This may need a
+// slight rework if we expand to multiple parser or emitter threads.)
 cl::opt<uint32_t> NumThreads(
     "threads",
     cl::desc("Number of translation threads (0 for purely sequential)"),
-    // TODO(stichnot): Settle on a good default.  Consider
-    // something related to std::thread::hardware_concurrency().
+    // TODO(stichnot): Settle on a good default. Consider something related to
+    // std::thread::hardware_concurrency().
     cl::init(2));
 
 cl::opt<Ice::OptLevel> OLevel(cl::desc("Optimization level"),
@@ -125,9 +124,9 @@
                        cl::desc("Enable edge splitting for Phi lowering"),
                        cl::init(true));
 
-// TODO(stichnot): See if we can easily use LLVM's -rng-seed option
-// and implementation.  I expect the implementation is different and
-// therefore the tests would need to be changed.
+// TODO(stichnot): See if we can easily use LLVM's -rng-seed option and
+// implementation. I expect the implementation is different and therefore the
+// tests would need to be changed.
 cl::opt<unsigned long long>
     RandomSeed("sz-seed", cl::desc("Seed the random number generator"),
                cl::init(1));
@@ -255,10 +254,10 @@
     "exit-success", cl::desc("Exit with success status, even if errors found"),
     cl::init(false));
 
-// Note: While this flag isn't used in the minimal build, we keep this
-// flag so that tests can set this command-line flag without concern
-// to the type of build. We double check that this flag at runtime
-// to make sure the consistency is maintained.
+// Note: While this flag isn't used in the minimal build, we keep this flag so
+// that tests can set this command-line flag without concern to the type of
+// build. We double check that this flag at runtime to make sure the
+// consistency is maintained.
 cl::opt<bool>
     BuildOnRead("build-on-read",
                 cl::desc("Build ICE instructions when reading bitcode"),
@@ -413,8 +412,8 @@
     ::DisableTranslation = true;
 
   Ice::VerboseMask VMask = Ice::IceV_None;
-  // Don't generate verbose messages if routines
-  // to dump messages are not available.
+  // Don't generate verbose messages if routines to dump messages are not
+  // available.
   if (BuildDefs::dump()) {
     for (unsigned i = 0; i != VerboseList.size(); ++i)
       VMask |= VerboseList[i];
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index 89f5783..87e16cd 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -79,8 +79,8 @@
   void setFunctionSections(bool NewValue) { FunctionSections = NewValue; }
 
   bool getGenerateUnitTestMessages() const {
-    // Note: If dump routines have been turned off, the error messages
-    // will not be readable. Hence, turn off.
+    // Note: If dump routines have been turned off, the error messages will not
+    // be readable. Hence, turn off.
     return !BuildDefs::dump() || GenerateUnitTestMessages;
   }
   void setGenerateUnitTestMessages(bool NewValue) {
diff --git a/src/IceCompileServer.cpp b/src/IceCompileServer.cpp
index b7bc72b..db0694b 100644
--- a/src/IceCompileServer.cpp
+++ b/src/IceCompileServer.cpp
@@ -37,9 +37,8 @@
 
 namespace {
 
-// Define a SmallVector backed buffer as a data stream, so that it
-// can hold the generated binary version of the textual bitcode in the
-// input file.
+// Define a SmallVector backed buffer as a data stream, so that it can hold the
+// generated binary version of the textual bitcode in the input file.
 class TextDataStreamer : public llvm::DataStreamer {
 public:
   TextDataStreamer() = default;
@@ -129,8 +128,8 @@
     }
     ELFStr.reset(new ELFStreamer(*FdOs.get()));
     Os.reset(FdOs.release());
-    // NaCl sets st_blksize to 0, and LLVM uses that to pick the
-    // default preferred buffer size. Set to something non-zero.
+    // NaCl sets st_blksize to 0, and LLVM uses that to pick the default
+    // preferred buffer size. Set to something non-zero.
     Os->SetBufferSize(1 << 14);
   } break;
   case FT_Asm:
diff --git a/src/IceCompileServer.h b/src/IceCompileServer.h
index e027cbb..8d99927 100644
--- a/src/IceCompileServer.h
+++ b/src/IceCompileServer.h
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares the compile server. Given a compiler implementation,
-/// it dispatches compile requests to the implementation.
+/// This file declares the compile server. Given a compiler implementation, it
+/// dispatches compile requests to the implementation.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -27,17 +27,17 @@
 
 namespace Ice {
 
-/// A CompileServer awaits compile requests, and dispatches the requests
-/// to a given Compiler. Each request is paired with an input stream,
-/// a context (which has the output stream), and a set of arguments.
-/// The CompileServer takes over the current thread to listen to requests,
-/// and compile requests are handled on separate threads.
+/// A CompileServer awaits compile requests, and dispatches the requests to a
+/// given Compiler. Each request is paired with an input stream, a context
+/// (which has the output stream), and a set of arguments. The CompileServer
+/// takes over the current thread to listen to requests, and compile requests
+/// are handled on separate threads.
 ///
 /// Currently, this only handles a single request.
 ///
-/// When run on the commandline, it receives and therefore dispatches
-/// the request immediately.  When run in the browser, it blocks waiting
-/// for a request.
+/// When run on the commandline, it receives and therefore dispatches the
+/// request immediately. When run in the browser, it blocks waiting for a
+/// request.
 class CompileServer {
   CompileServer() = delete;
   CompileServer(const CompileServer &) = delete;
diff --git a/src/IceCompiler.cpp b/src/IceCompiler.cpp
index 4d3bbd4..b4b6c89 100644
--- a/src/IceCompiler.cpp
+++ b/src/IceCompiler.cpp
@@ -52,8 +52,8 @@
     {"minimal_build", BuildDefs::minimal()},
     {"browser_mode", PNACL_BROWSER_TRANSLATOR}};
 
-// Validates values of build attributes. Prints them to Stream if
-// Stream is non-null.
+// Validates values of build attributes. Prints them to Stream if Stream is
+// non-null.
 void validateAndGenerateBuildAttributes(Ostream *Stream) {
   // List the supported targets.
   if (Stream) {
@@ -100,7 +100,7 @@
   }
 
   // The Minimal build (specifically, when dump()/emit() are not implemented)
-  // allows only --filetype=obj.  Check here to avoid cryptic error messages
+  // allows only --filetype=obj. Check here to avoid cryptic error messages
   // downstream.
   if (!BuildDefs::dump() && Ctx.getFlags().getOutFileType() != FT_Elf) {
     // TODO(stichnot): Access the actual command-line argument via
diff --git a/src/IceCompiler.h b/src/IceCompiler.h
index e121dbb..6239b9f 100644
--- a/src/IceCompiler.h
+++ b/src/IceCompiler.h
@@ -33,8 +33,8 @@
 public:
   Compiler() = default;
 
-  /// Run the compiler with the given GlobalContext for compilation
-  /// state.  Upon error, the Context's error status will be set.
+  /// Run the compiler with the given GlobalContext for compilation state. Upon
+  /// error, the Context's error status will be set.
   void run(const ClFlagsExtra &ExtraFlags, GlobalContext &Ctx,
            std::unique_ptr<llvm::DataStreamer> &&InputStream);
 };
diff --git a/src/IceConditionCodesARM32.h b/src/IceConditionCodesARM32.h
index d897a44..d739310 100644
--- a/src/IceConditionCodesARM32.h
+++ b/src/IceConditionCodesARM32.h
@@ -26,8 +26,8 @@
   CondARM32 &operator=(const CondARM32 &) = delete;
 
 public:
-  /// An enum of codes used for conditional instructions. The enum value
-  /// should match the value used to encode operands in binary instructions.
+  /// An enum of codes used for conditional instructions. The enum value should
+  /// match the value used to encode operands in binary instructions.
   enum Cond {
 #define X(tag, encode, opp, emit) tag = encode,
     ICEINSTARM32COND_TABLE
diff --git a/src/IceConverter.cpp b/src/IceConverter.cpp
index 4450a79..a4d4f53 100644
--- a/src/IceConverter.cpp
+++ b/src/IceConverter.cpp
@@ -52,9 +52,9 @@
 
 // Base class for converting LLVM to ICE.
 // TODO(stichnot): Redesign Converter, LLVM2ICEConverter,
-// LLVM2ICEFunctionConverter, and LLVM2ICEGlobalsConverter with
-// respect to Translator.  In particular, the unique_ptr ownership
-// rules in LLVM2ICEFunctionConverter.
+// LLVM2ICEFunctionConverter, and LLVM2ICEGlobalsConverter with respect to
+// Translator.  In particular, the unique_ptr ownership rules in
+// LLVM2ICEFunctionConverter.
 class LLVM2ICEConverter {
   LLVM2ICEConverter() = delete;
   LLVM2ICEConverter(const LLVM2ICEConverter &) = delete;
@@ -73,11 +73,11 @@
   const Ice::TypeConverter TypeConverter;
 };
 
-// Converter from LLVM functions to ICE. The entry point is the
-// convertFunction method.
+// Converter from LLVM functions to ICE. The entry point is the convertFunction
+// method.
 //
-// Note: this currently assumes that the given IR was verified to be
-// valid PNaCl bitcode. Otherwise, the behavior is undefined.
+// Note: this currently assumes that the given IR was verified to be valid
+// PNaCl bitcode. Otherwise, the behavior is undefined.
 class LLVM2ICEFunctionConverter : LLVM2ICEConverter {
   LLVM2ICEFunctionConverter() = delete;
   LLVM2ICEFunctionConverter(const LLVM2ICEFunctionConverter &) = delete;
@@ -107,10 +107,9 @@
       Func->addArg(mapValueToIceVar(ArgI));
     }
 
-    // Make an initial pass through the block list just to resolve the
-    // blocks in the original linearized order.  Otherwise the ICE
-    // linearized order will be affected by branch targets in
-    // terminator instructions.
+    // Make an initial pass through the block list just to resolve the blocks
+    // in the original linearized order. Otherwise the ICE linearized order
+    // will be affected by branch targets in terminator instructions.
     for (const BasicBlock &BBI : *F)
       mapBasicBlockToNode(&BBI);
     for (const BasicBlock &BBI : *F)
@@ -122,9 +121,8 @@
     Converter.translateFcn(std::move(Func));
   }
 
-  // convertConstant() does not use Func or require it to be a valid
-  // Ice::Cfg pointer.  As such, it's suitable for e.g. constructing
-  // global initializers.
+  // convertConstant() does not use Func or require it to be a valid Ice::Cfg
+  // pointer. As such, it's suitable for e.g. constructing global initializers.
   Ice::Constant *convertConstant(const Constant *Const) {
     if (const auto GV = dyn_cast<GlobalValue>(Const)) {
       Ice::GlobalDeclaration *Decl = getConverter().getGlobalDeclaration(GV);
@@ -197,9 +195,8 @@
     return IceTy;
   }
 
-  // Given an LLVM instruction and an operand number, produce the
-  // Ice::Operand this refers to. If there's no such operand, return
-  // nullptr.
+  // Given an LLVM instruction and an operand number, produce the Ice::Operand
+  // this refers to. If there's no such operand, return nullptr.
   Ice::Operand *convertOperand(const Instruction *Inst, unsigned OpNum) {
     if (OpNum >= Inst->getNumOperands()) {
       return nullptr;
@@ -551,8 +548,8 @@
     Ice::Variable *Dest = mapValueToIceVar(Inst);
     Ice::Operand *CallTarget = convertValue(Inst->getCalledValue());
     unsigned NumArgs = Inst->getNumArgOperands();
-    // Note: Subzero doesn't (yet) do anything special with the Tail
-    // flag in the bitcode, i.e. CallInst::isTailCall().
+    // Note: Subzero doesn't (yet) do anything special with the Tail flag in
+    // the bitcode, i.e. CallInst::isTailCall().
     Ice::InstCall *NewInst = nullptr;
     const Ice::Intrinsics::FullIntrinsicInfo *Info = nullptr;
 
@@ -649,8 +646,8 @@
 // Converter from LLVM global variables to ICE. The entry point is the
 // convertGlobalsToIce method.
 //
-// Note: this currently assumes that the given IR was verified to be
-// valid PNaCl bitcode. Othewise, the behavior is undefined.
+// Note: this currently assumes that the given IR was verified to be valid
+// PNaCl bitcode. Otherwise, the behavior is undefined.
 class LLVM2ICEGlobalsConverter : public LLVM2ICEConverter {
   LLVM2ICEGlobalsConverter() = delete;
   LLVM2ICEGlobalsConverter(const LLVM2ICEGlobalsConverter &) = delete;
@@ -661,15 +658,14 @@
   explicit LLVM2ICEGlobalsConverter(Ice::Converter &Converter)
       : LLVM2ICEConverter(Converter) {}
 
-  /// Converts global variables, and their initializers into ICE
-  /// global variable declarations, for module Mod. Returns the set of
-  /// converted declarations.
+  /// Converts global variables, and their initializers into ICE global variable
+  /// declarations, for module Mod. Returns the set of converted declarations.
   std::unique_ptr<Ice::VariableDeclarationList>
   convertGlobalsToIce(Module *Mod);
 
 private:
-  // Adds the Initializer to the list of initializers for the Global
-  // variable declaraation.
+  // Adds the Initializer to the list of initializers for the Global variable
+  // declaration.
   void addGlobalInitializer(Ice::VariableDeclaration &Global,
                             const Constant *Initializer) {
     const bool HasOffset = false;
@@ -678,15 +674,14 @@
   }
 
   // Adds Initializer to the list of initializers for Global variable
-  // declaration.  HasOffset is true only if Initializer is a
-  // relocation initializer and Offset should be added to the
-  // relocation.
+  // declaration. HasOffset is true only if Initializer is a relocation
+  // initializer and Offset should be added to the relocation.
   void addGlobalInitializer(Ice::VariableDeclaration &Global,
                             const Constant *Initializer, bool HasOffset,
                             Ice::RelocOffsetT Offset);
 
-  // Converts the given constant C to the corresponding integer
-  // literal it contains.
+  // Converts the given constant C to the corresponding integer literal it
+  // contains.
   Ice::RelocOffsetT getIntegerLiteralConstant(const Value *C) {
     const auto CI = dyn_cast<ConstantInt>(C);
     if (CI && CI->getType()->isIntegerTy(32))
diff --git a/src/IceConverter.h b/src/IceConverter.h
index 399e6f8..accc554 100644
--- a/src/IceConverter.h
+++ b/src/IceConverter.h
@@ -42,8 +42,8 @@
 
   llvm::Module *getModule() const { return Mod; }
 
-  /// Returns the global declaration associated with the corresponding
-  /// global value V. If no such global address, generates fatal error.
+  /// Returns the global declaration associated with the corresponding global
+  /// value V. If no such global address, generates fatal error.
   GlobalDeclaration *getGlobalDeclaration(const llvm::GlobalValue *V);
 
 private:
@@ -56,9 +56,8 @@
   /// getFlags().DefaultGlobalPrefix, if the prefix is non-empty.
   void nameUnnamedGlobalVariables(llvm::Module *Mod);
 
-  /// Walks module and generates names for unnamed functions using
-  /// prefix getFlags().DefaultFunctionPrefix, if the prefix is
-  /// non-empty.
+  /// Walks module and generates names for unnamed functions using prefix
+  /// getFlags().DefaultFunctionPrefix, if the prefix is non-empty.
   void nameUnnamedFunctions(llvm::Module *Mod);
 
   /// Converts functions to ICE, and then machine code.
diff --git a/src/IceDefs.h b/src/IceDefs.h
index d1ad81c..a38da03 100644
--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -9,7 +9,7 @@
 ///
 /// \file
 /// This file declares various useful types and classes that have widespread use
-/// across Subzero.  Every Subzero source file is expected to include IceDefs.h.
+/// across Subzero. Every Subzero source file is expected to include IceDefs.h.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -99,8 +99,8 @@
 }
 
 // makeUnique should be used when memory is expected to be allocated from the
-// heap (as opposed to allocated from some Allocator.) It is intended to be used
-// instead of new.
+// heap (as opposed to allocated from some Allocator.) It is intended to be
+// used instead of new.
 //
 // The expected usage is as follows
 //
@@ -140,8 +140,8 @@
 
 using IceString = std::string;
 using InstList = llvm::ilist<Inst>;
-// Ideally PhiList would be llvm::ilist<InstPhi>, and similar for
-// AssignList, but this runs into issues with SFINAE.
+// Ideally PhiList would be llvm::ilist<InstPhi>, and similar for AssignList,
+// but this runs into issues with SFINAE.
 using PhiList = InstList;
 using AssignList = InstList;
 
@@ -155,18 +155,18 @@
 using FunctionDeclarationList = std::vector<FunctionDeclaration *>;
 using VariableDeclarationList = std::vector<VariableDeclaration *>;
 
-/// SizeT is for holding small-ish limits like number of source
-/// operands in an instruction.  It is used instead of size_t (which
-/// may be 64-bits wide) when we want to save space.
+/// SizeT is for holding small-ish limits like number of source operands in an
+/// instruction. It is used instead of size_t (which may be 64-bits wide) when
+/// we want to save space.
 using SizeT = uint32_t;
 
-/// InstNumberT is for holding an instruction number.  Instruction
-/// numbers are used for representing Variable live ranges.
+/// InstNumberT is for holding an instruction number. Instruction numbers are
+/// used for representing Variable live ranges.
 using InstNumberT = int32_t;
 
-/// A LiveBeginEndMapEntry maps a Variable::Number value to an
-/// Inst::Number value, giving the instruction number that begins or
-/// ends a variable's live range.
+/// A LiveBeginEndMapEntry maps a Variable::Number value to an Inst::Number
+/// value, giving the instruction number that begins or ends a variable's live
+/// range.
 using LiveBeginEndMapEntry = std::pair<SizeT, InstNumberT>;
 using LiveBeginEndMap =
     std::vector<LiveBeginEndMapEntry, CfgLocalAllocator<LiveBeginEndMapEntry>>;
@@ -175,9 +175,8 @@
 using TimerStackIdT = uint32_t;
 using TimerIdT = uint32_t;
 
-/// Use alignas(MaxCacheLineSize) to isolate variables/fields that
-/// might be contended while multithreading.  Assumes the maximum cache
-/// line size is 64.
+/// Use alignas(MaxCacheLineSize) to isolate variables/fields that might be
+/// contended while multithreading. Assumes the maximum cache line size is 64.
 enum { MaxCacheLineSize = 64 };
 // Use ICE_CACHELINE_BOUNDARY to force the next field in a declaration
 // list to be aligned to the next cache line.
@@ -191,15 +190,15 @@
 enum { RelocAddrSize = 4 };
 
 enum LivenessMode {
-  /// Basic version of live-range-end calculation.  Marks the last uses
-  /// of variables based on dataflow analysis.  Records the set of
-  /// live-in and live-out variables for each block.  Identifies and
-  /// deletes dead instructions (primarily stores).
+  /// Basic version of live-range-end calculation. Marks the last uses of
+  /// variables based on dataflow analysis. Records the set of live-in and
+  /// live-out variables for each block. Identifies and deletes dead
+  /// instructions (primarily stores).
   Liveness_Basic,
 
-  /// In addition to Liveness_Basic, also calculate the complete
-  /// live range for each variable in a form suitable for interference
-  /// calculation and register allocation.
+  /// In addition to Liveness_Basic, also calculate the complete live range for
+  /// each variable in a form suitable for interference calculation and register
+  /// allocation.
   Liveness_Intervals
 };
 
@@ -244,10 +243,10 @@
 
 enum ErrorCodes { EC_None = 0, EC_Args, EC_Bitcode, EC_Translation };
 
-/// Wrapper around std::error_code for allowing multiple errors to be
-/// folded into one.  The current implementation keeps track of the
-/// first error, which is likely to be the most useful one, and this
-/// could be extended to e.g. collect a vector of errors.
+/// Wrapper around std::error_code for allowing multiple errors to be folded
+/// into one. The current implementation keeps track of the first error, which
+/// is likely to be the most useful one, and this could be extended to e.g.
+/// collect a vector of errors.
 class ErrorCode : public std::error_code {
   ErrorCode(const ErrorCode &) = delete;
   ErrorCode &operator=(const ErrorCode &) = delete;
diff --git a/src/IceELFObjectWriter.cpp b/src/IceELFObjectWriter.cpp
index 9e1d44e..7456856 100644
--- a/src/IceELFObjectWriter.cpp
+++ b/src/IceELFObjectWriter.cpp
@@ -105,10 +105,9 @@
 
 ELFRelocationSection *
 ELFObjectWriter::createRelocationSection(const ELFSection *RelatedSection) {
-  // Choice of RELA vs REL is actually separate from elf64 vs elf32,
-  // but in practice we've only had .rela for elf64 (x86-64).
-  // In the future, the two properties may need to be decoupled
-  // and the ShEntSize can vary more.
+  // Choice of RELA vs REL is actually separate from elf64 vs elf32, but in
+  // practice we've only had .rela for elf64 (x86-64). In the future, the two
+  // properties may need to be decoupled and the ShEntSize can vary more.
   const Elf64_Word ShType = ELF64 ? SHT_RELA : SHT_REL;
   IceString RelPrefix = ELF64 ? ".rela" : ".rel";
   IceString RelSectionName = RelPrefix + RelatedSection->getName();
@@ -158,8 +157,8 @@
 }
 
 void ELFObjectWriter::assignSectionNumbersInfo(SectionList &AllSections) {
-  // Go through each section, assigning them section numbers and
-  // and fill in the size for sections that aren't incrementally updated.
+  // Go through each section, assigning them section numbers and and fill in
+  // the size for sections that aren't incrementally updated.
   assert(!SectionNumbersAssigned);
   SizeT CurSectionNumber = 0;
   NullSection->setNumber(CurSectionNumber++);
@@ -233,8 +232,8 @@
     RelSection = RelTextSections[0];
   }
   RelocOffsetT OffsetInSection = Section->getCurrentSize();
-  // Function symbols are set to 0 size in the symbol table,
-  // in contrast to data symbols which have a proper size.
+  // Function symbols are set to 0 size in the symbol table, in contrast to
+  // data symbols which have a proper size.
   SizeT SymbolSize = 0;
   Section->appendData(Str, Asm->getBufferView());
   uint8_t SymbolType;
@@ -268,9 +267,8 @@
   return ELFObjectWriter::BSS;
 }
 
-// Partition the Vars list by SectionType into VarsBySection.
-// If TranslateOnly is non-empty, then only the TranslateOnly variable
-// is kept for emission.
+// Partition the Vars list by SectionType into VarsBySection. If TranslateOnly
+// is non-empty, then only the TranslateOnly variable is kept for emission.
 void partitionGlobalsBySection(const VariableDeclarationList &Vars,
                                VariableDeclarationList VarsBySection[],
                                const IceString &TranslateOnly) {
@@ -440,8 +438,8 @@
 void ELFObjectWriter::writeELFHeaderInternal(Elf64_Off SectionHeaderOffset,
                                              SizeT SectHeaderStrIndex,
                                              SizeT NumSections) {
-  // Write the e_ident: magic number, class, etc.
-  // The e_ident is byte order and ELF class independent.
+  // Write the e_ident: magic number, class, etc. The e_ident is byte order and
+  // ELF class independent.
   Str.writeBytes(llvm::StringRef(ElfMagic, strlen(ElfMagic)));
   Str.write8(IsELF64 ? ELFCLASS64 : ELFCLASS32);
   Str.write8(ELFDATA2LSB);
@@ -451,21 +449,21 @@
   Str.write8(ELF_ABIVersion);
   Str.writeZeroPadding(EI_NIDENT - EI_PAD);
 
-  // TODO(jvoung): Handle and test > 64K sections.  See the generic ABI doc:
-  // https://refspecs.linuxbase.org/elf/gabi4+/ch4.eheader.html
-  // e_shnum should be 0 and then actual number of sections is
-  // stored in the sh_size member of the 0th section.
+  // TODO(jvoung): Handle and test > 64K sections. See the generic ABI doc:
+  // https://refspecs.linuxbase.org/elf/gabi4+/ch4.eheader.html e_shnum should
+  // be 0 and then actual number of sections is stored in the sh_size member of
+  // the 0th section.
   assert(NumSections < SHN_LORESERVE);
   assert(SectHeaderStrIndex < SHN_LORESERVE);
 
   const TargetArch Arch = Ctx.getFlags().getTargetArch();
-  // Write the rest of the file header, which does depend on byte order
-  // and ELF class.
+  // Write the rest of the file header, which does depend on byte order and ELF
+  // class.
   Str.writeLE16(ET_REL);                                        // e_type
   Str.writeLE16(getELFMachine(Ctx.getFlags().getTargetArch())); // e_machine
   Str.writeELFWord<IsELF64>(1);                                 // e_version
-  // Since this is for a relocatable object, there is no entry point,
-  // and no program headers.
+  // Since this is for a relocatable object, there is no entry point, and no
+  // program headers.
   Str.writeAddrOrOffset<IsELF64>(0);                                // e_entry
   Str.writeAddrOrOffset<IsELF64>(0);                                // e_phoff
   Str.writeAddrOrOffset<IsELF64>(SectionHeaderOffset);              // e_shoff
@@ -505,8 +503,8 @@
       SecStrBuf.str(), SHT_PROGBITS, ShFlags, Align, WriteAmt);
   RODataSections.push_back(Section);
   SizeT OffsetInSection = 0;
-  // The symbol table entry doesn't need to know the defined symbol's
-  // size since this is in a section with a fixed Entry Size.
+  // The symbol table entry doesn't need to know the defined symbol's size
+  // since this is in a section with a fixed Entry Size.
   const SizeT SymbolSize = 0;
   Section->setFileOffset(alignFileOffset(Align));
 
@@ -541,11 +539,11 @@
   Section->setSize(OffsetInSection);
 }
 
-// Instantiate known needed versions of the template, since we are
-// defining the function in the .cpp file instead of the .h file.
-// We may need to instantiate constant pools for integers as well
-// if we do constant-pooling of large integers to remove them
-// from the instruction stream (fewer bytes controlled by an attacker).
+// Instantiate known needed versions of the template, since we are defining the
+// function in the .cpp file instead of the .h file. We may need to instantiate
+// constant pools for integers as well if we do constant-pooling of large
+// integers to remove them from the instruction stream (fewer bytes controlled
+// by an attacker).
 template void ELFObjectWriter::writeConstantPool<ConstantFloat>(Type Ty);
 
 template void ELFObjectWriter::writeConstantPool<ConstantDouble>(Type Ty);
diff --git a/src/IceELFObjectWriter.h b/src/IceELFObjectWriter.h
index c1bfb74..60ed60c 100644
--- a/src/IceELFObjectWriter.h
+++ b/src/IceELFObjectWriter.h
@@ -24,11 +24,11 @@
 
 namespace Ice {
 
-/// Higher level ELF object writer.  Manages section information and writes
-/// the final ELF object.  The object writer will write to file the code
-/// and data as it is being defined (rather than keep a copy).
-/// After all definitions are written out, it will finalize the bookkeeping
-/// sections and write them out.  Expected usage:
+/// Higher level ELF object writer. Manages section information and writes the
+/// final ELF object. The object writer will write to file the code and data as
+/// it is being defined (rather than keep a copy). After all definitions are
+/// written out, it will finalize the bookkeeping sections and write them out.
+/// Expected usage:
 ///
 /// (1) writeInitialELFHeader (invoke once)
 /// (2) writeDataSection      (may be invoked multiple times, as long as
@@ -38,9 +38,9 @@
 /// (5) setUndefinedSyms      (invoke once)
 /// (6) writeNonUserSections  (invoke once)
 ///
-/// The requirement for writeDataSection to be invoked only once can
-/// be relaxed if using -fdata-sections. The requirement to invoke only once
-/// without -fdata-sections is so that variables that belong to each possible
+/// The requirement for writeDataSection to be invoked only once can be relaxed
+/// if using -fdata-sections. The requirement to invoke only once without
+/// -fdata-sections is so that variables that belong to each possible
 /// SectionType are contiguous in the file. With -fdata-sections, each global
 /// variable is in a separate section and therefore the sections will be
 /// trivially contiguous.
@@ -53,27 +53,27 @@
   ELFObjectWriter(GlobalContext &Ctx, ELFStreamer &Out);
 
   /// Write the initial ELF header. This is just to reserve space in the ELF
-  /// file. Reserving space allows the other functions to write text
-  /// and data directly to the file and get the right file offsets.
+  /// file. Reserving space allows the other functions to write text and data
+  /// directly to the file and get the right file offsets.
   void writeInitialELFHeader();
 
-  /// Copy initializer data for globals to file and note the offset and size
-  /// of each global's definition in the symbol table.
-  /// Use the given target's RelocationKind for any relocations.
+  /// Copy initializer data for globals to file and note the offset and size of
+  /// each global's definition in the symbol table. Use the given target's
+  /// RelocationKind for any relocations.
   void writeDataSection(const VariableDeclarationList &Vars,
                         FixupKind RelocationKind,
                         const IceString &SectionSuffix);
 
   /// Copy data of a function's text section to file and note the offset of the
-  /// symbol's definition in the symbol table.
-  /// Copy the text fixups for use after all functions are written.
-  /// The text buffer and fixups are extracted from the Assembler object.
+  /// symbol's definition in the symbol table. Copy the text fixups for use
+  /// after all functions are written. The text buffer and fixups are extracted
+  /// from the Assembler object.
   void writeFunctionCode(const IceString &FuncName, bool IsInternal,
                          const Assembler *Asm);
 
-  /// Queries the GlobalContext for constant pools of the given type
-  /// and writes out read-only data sections for those constants. This also
-  /// fills the symbol table with labels for each constant pool entry.
+  /// Queries the GlobalContext for constant pools of the given type and writes
+  /// out read-only data sections for those constants. This also fills the
+  /// symbol table with labels for each constant pool entry.
   template <typename ConstType> void writeConstantPool(Type Ty);
 
   /// Write a jump table and register fixups for the target addresses.
@@ -82,12 +82,12 @@
   /// Populate the symbol table with a list of external/undefined symbols.
   void setUndefinedSyms(const ConstantList &UndefSyms);
 
-  /// Do final layout and write out the rest of the object file.
-  /// Finally, patch up the initial ELF header with the final info.
+  /// Do final layout and write out the rest of the object file. Finally, patch
+  /// up the initial ELF header with the final info.
   void writeNonUserSections();
 
-  /// Which type of ELF section a global variable initializer belongs to.
-  /// This is used as an array index so should start at 0 and be contiguous.
+  /// Which type of ELF section a global variable initializer belongs to. This
+  /// is used as an array index so should start at 0 and be contiguous.
   enum SectionType { ROData = 0, Data, BSS, NumSectionTypes };
 
 private:
@@ -120,25 +120,25 @@
                    Elf64_Xword ShFlags, Elf64_Xword ShAddralign,
                    Elf64_Xword ShEntsize);
 
-  /// Create a relocation section, given the related section
-  /// (e.g., .text, .data., .rodata).
+  /// Create a relocation section, given the related section (e.g., .text,
+  /// .data., .rodata).
   ELFRelocationSection *
   createRelocationSection(const ELFSection *RelatedSection);
 
-  /// Align the file position before writing out a section's data,
-  /// and return the position of the file.
+  /// Align the file position before writing out a section's data, and return
+  /// the position of the file.
   Elf64_Off alignFileOffset(Elf64_Xword Align);
 
-  /// Assign an ordering / section numbers to each section.
-  /// Fill in other information that is only known near the end
-  /// (such as the size, if it wasn't already incrementally updated).
-  /// This then collects all sections in the decided order, into one vector,
-  /// for conveniently writing out all of the section headers.
+  /// Assign an ordering / section numbers to each section. Fill in other
+  /// information that is only known near the end (such as the size, if it
+  /// wasn't already incrementally updated). This then collects all sections in
+  /// the decided order, into one vector, for conveniently writing out all of
+  /// the section headers.
   void assignSectionNumbersInfo(SectionList &AllSections);
 
-  /// This function assigns .foo and .rel.foo consecutive section numbers.
-  /// It also sets the relocation section's sh_info field to the related
-  /// section's number.
+  /// This function assigns .foo and .rel.foo consecutive section numbers. It
+  /// also sets the relocation section's sh_info field to the related section's
+  /// number.
   template <typename UserSectionList>
   void assignRelSectionNumInPairs(SizeT &CurSectionNumber,
                                   UserSectionList &UserSections,
@@ -156,9 +156,9 @@
                        FixupKind RelocationKind,
                        const IceString &SectionSuffix);
 
-  /// Write the final relocation sections given the final symbol table.
-  /// May also be able to seek around the file and resolve function calls
-  /// that are for functions within the same section.
+  /// Write the final relocation sections given the final symbol table. May also
+  /// be able to seek around the file and resolve function calls that are for
+  /// functions within the same section.
   void writeAllRelocationSections();
   void writeRelocationSections(RelSectionList &RelSections);
 
diff --git a/src/IceELFSection.cpp b/src/IceELFSection.cpp
index 7893354..3e33c99 100644
--- a/src/IceELFSection.cpp
+++ b/src/IceELFSection.cpp
@@ -82,8 +82,8 @@
 // Symbol tables.
 
 void ELFSymbolTableSection::createNullSymbol(ELFSection *NullSection) {
-  // The first entry in the symbol table should be a NULL entry,
-  // so make sure the map is still empty.
+  // The first entry in the symbol table should be a NULL entry, so make sure
+  // the map is still empty.
   assert(LocalSymbols.empty());
   const IceString NullSymName("");
   createDefinedSym(NullSymName, STT_NOTYPE, STB_LOCAL, NullSection, 0, 0);
@@ -208,8 +208,8 @@
     assert(StringIndex.second == UnknownIndex);
     llvm::StringRef Cur = llvm::StringRef(StringIndex.first);
     if (Prev.endswith(Cur)) {
-      // Prev is already in the StringData, and Cur is shorter than Prev
-      // based on the sort.
+      // Prev is already in the StringData, and Cur is shorter than Prev based
+      // on the sort.
       StringIndex.second = StringData.size() - Cur.size() - 1;
       continue;
     }
diff --git a/src/IceELFSection.h b/src/IceELFSection.h
index 92dc02b..636f3b4 100644
--- a/src/IceELFSection.h
+++ b/src/IceELFSection.h
@@ -36,15 +36,15 @@
 public:
   virtual ~ELFSection() = default;
 
-  /// Sentinel value for a section number/index for before the final
-  /// section index is actually known. The dummy NULL section will be assigned
-  /// number 0, and it is referenced by the dummy 0-th symbol in the symbol
-  /// table, so use max() instead of 0.
+  /// Sentinel value for a section number/index for before the final section
+  /// index is actually known. The dummy NULL section will be assigned number 0,
+  /// and it is referenced by the dummy 0-th symbol in the symbol table, so use
+  /// max() instead of 0.
   enum { NoSectionNumber = std::numeric_limits<SizeT>::max() };
 
-  /// Constructs an ELF section, filling in fields that will be known
-  /// once the *type* of section is decided.  Other fields may be updated
-  /// incrementally or only after the program is completely defined.
+  /// Constructs an ELF section, filling in fields that will be known once the
+  /// *type* of section is decided. Other fields may be updated incrementally or
+  /// only after the program is completely defined.
   ELFSection(const IceString &Name, Elf64_Word ShType, Elf64_Xword ShFlags,
              Elf64_Xword ShAddralign, Elf64_Xword ShEntsize)
       : Name(Name), Header() {
@@ -84,8 +84,8 @@
   template <bool IsELF64> void writeHeader(ELFStreamer &Str);
 
 protected:
-  /// Name of the section in convenient string form (instead of a index
-  /// into the Section Header String Table, which is not known till later).
+  /// Name of the section in convenient string form (instead of a index into the
+  /// Section Header String Table, which is not known till later).
   const IceString Name;
 
   // The fields of the header. May only be partially initialized, but should
@@ -96,8 +96,8 @@
   SizeT Number = NoSectionNumber;
 };
 
-/// Models text/code sections. Code is written out incrementally and the
-/// size of the section is then updated incrementally.
+/// Models text/code sections. Code is written out incrementally and the size of
+/// the section is then updated incrementally.
 class ELFTextSection : public ELFSection {
   ELFTextSection() = delete;
   ELFTextSection(const ELFTextSection &) = delete;
@@ -109,9 +109,9 @@
   void appendData(ELFStreamer &Str, const llvm::StringRef MoreData);
 };
 
-/// Models data/rodata sections. Data is written out incrementally and the
-/// size of the section is then updated incrementally.
-/// Some rodata sections may have fixed entsize and duplicates may be mergeable.
+/// Models data/rodata sections. Data is written out incrementally and the size
+/// of the section is then updated incrementally. Some rodata sections may have
+/// fixed entsize and duplicates may be mergeable.
 class ELFDataSection : public ELFSection {
   ELFDataSection() = delete;
   ELFDataSection(const ELFDataSection &) = delete;
@@ -128,8 +128,8 @@
                               RelocOffsetT RelocOffset);
 
   /// Pad the next section offset for writing data elements to the requested
-  /// alignment. If the section is NOBITS then do not actually write out
-  /// the padding and only update the section size.
+  /// alignment. If the section is NOBITS then do not actually write out the
+  /// padding and only update the section size.
   void padToAlignment(ELFStreamer &Str, Elf64_Xword Align);
 };
 
@@ -141,8 +141,8 @@
   ELFSection *Section;
   SizeT Number;
 
-  /// Sentinel value for symbols that haven't been assigned a number yet.
-  /// The dummy 0-th symbol will be assigned number 0, so don't use that.
+  /// Sentinel value for symbols that haven't been assigned a number yet. The
+  /// dummy 0-th symbol will be assigned number 0, so don't use that.
   enum { UnknownNumber = std::numeric_limits<SizeT>::max() };
 
   void setNumber(SizeT N) {
@@ -170,16 +170,15 @@
       : ELFSection(Name, ShType, ShFlags, ShAddralign, ShEntsize),
         NullSymbol(nullptr) {}
 
-  /// Create initial entry for a symbol when it is defined.
-  /// Each entry should only be defined once.
-  /// We might want to allow Name to be a dummy name initially, then
-  /// get updated to the real thing, since Data initializers are read
-  /// before the bitcode's symbol table is read.
+  /// Create initial entry for a symbol when it is defined. Each entry should
+  /// only be defined once. We might want to allow Name to be a dummy name
+  /// initially, then get updated to the real thing, since Data initializers are
+  /// read before the bitcode's symbol table is read.
   void createDefinedSym(const IceString &Name, uint8_t Type, uint8_t Binding,
                         ELFSection *Section, RelocOffsetT Offset, SizeT Size);
 
-  /// Note that a symbol table entry needs to be created for the given
-  /// symbol because it is undefined.
+  /// Note that a symbol table entry needs to be created for the given symbol
+  /// because it is undefined.
   void noteUndefinedSym(const IceString &Name, ELFSection *NullSection);
 
   const ELFSym *findSymbol(const IceString &Name) const;
@@ -198,8 +197,8 @@
   void writeData(ELFStreamer &Str, bool IsELF64);
 
 private:
-  // Map from symbol name to its symbol information.
-  // This assumes symbols are unique across all sections.
+  // Map from symbol name to its symbol information. This assumes symbols are
+  // unique across all sections.
   using SymtabKey = IceString;
   using SymMap = std::map<SymtabKey, ELFSym>;
 
@@ -207,8 +206,8 @@
   void writeSymbolMap(ELFStreamer &Str, const SymMap &Map);
 
   const ELFSym *NullSymbol;
-  // Keep Local and Global symbols separate, since the sh_info needs to
-  // know the index of the last LOCAL.
+  // Keep Local and Global symbols separate, since the sh_info needs to know
+  // the index of the last LOCAL.
   SymMap LocalSymbols;
   SymMap GlobalSymbols;
 };
@@ -231,8 +230,8 @@
     RelatedSection = Section;
   }
 
-  /// Track additional relocations which start out relative to offset 0,
-  /// but should be adjusted to be relative to BaseOff.
+  /// Track additional relocations which start out relative to offset 0, but
+  /// should be adjusted to be relative to BaseOff.
   void addRelocations(RelocOffsetT BaseOff, const FixupRefList &FixupRefs);
 
   /// Track a single additional relocation.
@@ -251,12 +250,11 @@
   FixupList Fixups;
 };
 
-/// Models a string table.  The user will build the string table by
-/// adding strings incrementally.  At some point, all strings should be
-/// known and doLayout() should be called. After that, no other
-/// strings may be added.  However, the final offsets of the strings
-/// can be discovered and used to fill out section headers and symbol
-/// table entries.
+/// Models a string table. The user will build the string table by adding
+/// strings incrementally. At some point, all strings should be known and
+/// doLayout() should be called. After that, no other strings may be added.
+/// However, the final offsets of the strings can be discovered and used to fill
+/// out section headers and symbol table entries.
 class ELFStringTableSection : public ELFSection {
   ELFStringTableSection() = delete;
   ELFStringTableSection(const ELFStringTableSection &) = delete;
@@ -271,12 +269,12 @@
   /// Finalizes the layout of the string table and fills in the section Data.
   void doLayout();
 
-  /// The first byte of the string table should be \0, so it is an
-  /// invalid index.  Indices start out as unknown until layout is complete.
+  /// The first byte of the string table should be \0, so it is an invalid
+  /// index. Indices start out as unknown until layout is complete.
   enum { UnknownIndex = 0 };
 
-  /// Grabs the final index of a string after layout. Returns UnknownIndex
-  /// if the string's index is not found.
+  /// Grabs the final index of a string after layout. Returns UnknownIndex if
+  /// the string's index is not found.
   size_t getIndex(const IceString &Str) const;
 
   llvm::StringRef getSectionData() const {
@@ -290,19 +288,19 @@
 private:
   bool isLaidOut() const { return !StringData.empty(); }
 
-  /// Strings can share a string table entry if they share the same
-  /// suffix.  E.g., "pop" and "lollipop" can both use the characters
-  /// in "lollipop", but "pops" cannot, and "unpop" cannot either.
-  /// Though, "pop", "lollipop", and "unpop" share "pop" as the suffix,
-  /// "pop" can only share the characters with one of them.
+  /// Strings can share a string table entry if they share the same suffix.
+  /// E.g., "pop" and "lollipop" can both use the characters in "lollipop", but
+  /// "pops" cannot, and "unpop" cannot either. Though, "pop", "lollipop", and
+  /// "unpop" share "pop" as the suffix, "pop" can only share the characters
+  /// with one of them.
   struct SuffixComparator {
     bool operator()(const IceString &StrA, const IceString &StrB) const;
   };
 
   using StringToIndexType = std::map<IceString, size_t, SuffixComparator>;
 
-  /// Track strings to their index.  Index will be UnknownIndex if not
-  /// yet laid out.
+  /// Track strings to their index. Index will be UnknownIndex if not yet laid
+  /// out.
   StringToIndexType StringToIndexMap;
 
   using RawDataType = std::vector<uint8_t>;
diff --git a/src/IceELFStreamer.h b/src/IceELFStreamer.h
index 93051b9..ab99891 100644
--- a/src/IceELFStreamer.h
+++ b/src/IceELFStreamer.h
@@ -20,8 +20,8 @@
 
 namespace Ice {
 
-/// Low level writer that can that can handle ELFCLASS32/64.
-/// Little endian only for now.
+/// Low level writer that can that can handle ELFCLASS32/64. Little endian only
+/// for now.
 class ELFStreamer {
   ELFStreamer() = delete;
   ELFStreamer(const ELFStreamer &) = delete;
diff --git a/src/IceFixups.cpp b/src/IceFixups.cpp
index ff7916c..a86d985 100644
--- a/src/IceFixups.cpp
+++ b/src/IceFixups.cpp
@@ -40,8 +40,8 @@
     else
       Str << Ctx->mangleName(CR->getName());
   } else {
-    // NOTE: currently only float/doubles are put into constant pools.
-    // In the future we may put integers as well.
+    // NOTE: currently only float/doubles are put into constant pools. In the
+    // future we may put integers as well.
     assert(llvm::isa<ConstantFloat>(C) || llvm::isa<ConstantDouble>(C));
     C->emitPoolLabel(Str);
   }
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index b4da1b6..7b7183d 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file defines aspects of the compilation that persist across
-/// multiple functions.
+/// This file defines aspects of the compilation that persist across multiple
+/// functions.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -48,9 +48,9 @@
 
 namespace {
 
-// Define the key comparison function for the constant pool's
-// unordered_map, but only for key types of interest: integer types,
-// floating point types, and the special RelocatableTuple.
+// Define the key comparison function for the constant pool's unordered_map,
+// but only for key types of interest: integer types, floating point types, and
+// the special RelocatableTuple.
 template <typename KeyType, class Enable = void> struct KeyCompare {};
 
 template <typename KeyType>
@@ -70,9 +70,9 @@
   }
 };
 
-// Define a key comparison function for sorting the constant pool's
-// values after they are dumped to a vector.  This covers integer
-// types, floating point types, and ConstantRelocatable values.
+// Define a key comparison function for sorting the constant pool's values
+// after they are dumped to a vector. This covers integer types, floating point
+// types, and ConstantRelocatable values.
 template <typename ValueType, class Enable = void> struct KeyCompareLess {};
 
 template <typename ValueType>
@@ -601,8 +601,8 @@
         assert(OldName[OldPos - 1] == 'S');
         assert(OldName[OldPos + Length] == '_');
         if (AllZs) {
-          // Replace N 'Z' characters with a '0' (if N=0) or '1' (if
-          // N>0) followed by N '0' characters.
+          // Replace N 'Z' characters with a '0' (if N=0) or '1' (if N>0)
+          // followed by N '0' characters.
           NewName[NewPos++] = (Length ? '1' : '0');
           for (size_t i = 0; i < Length; ++i) {
             NewName[NewPos++] = '0';
@@ -642,16 +642,15 @@
   OldName = NewName;
 }
 
-// In this context, name mangling means to rewrite a symbol using a
-// given prefix.  For a C++ symbol, nest the original symbol inside
-// the "prefix" namespace.  For other symbols, just prepend the
-// prefix.
+// In this context, name mangling means to rewrite a symbol using a given
+// prefix. For a C++ symbol, nest the original symbol inside the "prefix"
+// namespace. For other symbols, just prepend the prefix.
 IceString GlobalContext::mangleName(const IceString &Name) const {
-  // An already-nested name like foo::bar() gets pushed down one
-  // level, making it equivalent to Prefix::foo::bar().
+  // An already-nested name like foo::bar() gets pushed down one level, making
+  // it equivalent to Prefix::foo::bar().
   //   _ZN3foo3barExyz ==> _ZN6Prefix3foo3barExyz
-  // A non-nested but mangled name like bar() gets nested, making it
-  // equivalent to Prefix::bar().
+  // A non-nested but mangled name like bar() gets nested, making it equivalent
+  // to Prefix::bar().
   //   _Z3barxyz ==> ZN6Prefix3barExyz
   // An unmangled, extern "C" style name, gets a simple prefix:
   //   bar ==> Prefixbar
@@ -671,28 +670,27 @@
     //   (splice in "6Prefix")          ^^^^^^^
     snprintf(NewName.data(), BufLen, "_ZN%u%s%s", PrefixLength,
              TestPrefix.c_str(), NameBase.data());
-    // We ignore the snprintf return value (here and below).  If we
-    // somehow miscalculated the output buffer length, the output will
-    // be truncated, but it will be truncated consistently for all
-    // mangleName() calls on the same input string.
+    // We ignore the snprintf return value (here and below). If we somehow
+    // miscalculated the output buffer length, the output will be truncated,
+    // but it will be truncated consistently for all mangleName() calls on the
+    // same input string.
     incrementSubstitutions(NewName);
     return NewName.data();
   }
 
-  // Artificially limit BaseLength to 9 digits (less than 1 billion)
-  // because sscanf behavior is undefined on integer overflow.  If
-  // there are more than 9 digits (which we test by looking at the
-  // beginning of NameBase), then we consider this a failure to parse
-  // a namespace mangling, and fall back to the simple prefixing.
+  // Artificially limit BaseLength to 9 digits (less than 1 billion) because
+  // sscanf behavior is undefined on integer overflow. If there are more than 9
+  // digits (which we test by looking at the beginning of NameBase), then we
+  // consider this a failure to parse a namespace mangling, and fall back to
+  // the simple prefixing.
   ItemsParsed = sscanf(Name.c_str(), "_Z%9u%s", &BaseLength, NameBase.data());
   if (ItemsParsed == 2 && BaseLength <= strlen(NameBase.data()) &&
       !isdigit(NameBase[0])) {
     // Transform _Z3barxyz ==> _ZN6Prefix3barExyz
     //                           ^^^^^^^^    ^
-    // (splice in "N6Prefix", and insert "E" after "3bar")
-    // But an "I" after the identifier indicates a template argument
-    // list terminated with "E"; insert the new "E" before/after the
-    // old "E".  E.g.:
+    // (splice in "N6Prefix", and insert "E" after "3bar") But an "I" after the
+    // identifier indicates a template argument list terminated with "E";
+    // insert the new "E" before/after the old "E".  E.g.:
     // Transform _Z3barIabcExyz ==> _ZN6Prefix3barIabcEExyz
     //                                ^^^^^^^^         ^
     // (splice in "N6Prefix", and insert "E" after "3barIabcE")
@@ -730,8 +728,8 @@
   }
 }
 
-// TODO(stichnot): Consider adding thread-local caches of constant
-// pool entries to reduce contention.
+// TODO(stichnot): Consider adding thread-local caches of constant pool entries
+// to reduce contention.
 
 // All locking is done by the getConstantInt[0-9]+() target function.
 Constant *GlobalContext::getConstantInt(Type Ty, int64_t Value) {
@@ -875,8 +873,8 @@
 
 JumpTableDataList GlobalContext::getJumpTables() {
   JumpTableDataList JumpTables(*getJumpTableList());
-  // Make order deterministic by sorting into functions and then ID of the
-  // jump table within that function.
+  // Make order deterministic by sorting into functions and then ID of the jump
+  // table within that function.
   std::sort(JumpTables.begin(), JumpTables.end(),
             [](const JumpTableData &A, const JumpTableData &B) {
               if (A.getFunctionName() != B.getFunctionName())
@@ -946,11 +944,10 @@
   Timers->at(StackID).setName(NewName);
 }
 
-// Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr
-// at the interface to take and transfer ownership, but they
-// internally store the raw Cfg pointer in the work queue.  This
-// allows e.g. future queue optimizations such as the use of atomics
-// to modify queue elements.
+// Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr at the
+// interface to take and transfer ownership, but they internally store the raw
+// Cfg pointer in the work queue. This allows e.g. future queue optimizations
+// such as the use of atomics to modify queue elements.
 void GlobalContext::optQueueBlockingPush(std::unique_ptr<Cfg> Func) {
   assert(Func);
   OptQ.blockingPush(Func.release());
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index 8a747f8..baab9ca 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares aspects of the compilation that persist across
-/// multiple functions.
+/// This file declares aspects of the compilation that persist across multiple
+/// functions.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -186,9 +186,10 @@
   /// translators using the same bitcode as input.
   IceString mangleName(const IceString &Name) const;
 
-  // Manage Constants.
-  // getConstant*() functions are not const because they might add
-  // something to the constant pool.
+  /// \name Manage Constants.
+  /// @{
+  // getConstant*() functions are not const because they might add something to
+  // the constant pool.
   Constant *getConstantInt(Type Ty, int64_t Value);
   Constant *getConstantInt1(int8_t ConstantInt1);
   Constant *getConstantInt8(int8_t ConstantInt8);
@@ -205,11 +206,12 @@
   Constant *getConstantUndef(Type Ty);
   /// Returns a zero value.
   Constant *getConstantZero(Type Ty);
-  /// getConstantPool() returns a copy of the constant pool for
-  /// constants of a given type.
+  /// getConstantPool() returns a copy of the constant pool for constants of a
+  /// given type.
   ConstantList getConstantPool(Type Ty);
   /// Returns a copy of the list of external symbols.
   ConstantList getConstantExternSyms();
+  /// @}
 
   /// Return a locked pointer to the registered jump tables.
   JumpTableDataList getJumpTables();
@@ -299,36 +301,35 @@
   /// These are predefined TimerStackIdT values.
   enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num };
 
-  /// newTimerStackID() creates a new TimerStack in the global space.
-  /// It does not affect any TimerStack objects in TLS.
+  /// newTimerStackID() creates a new TimerStack in the global space. It does
+  /// not affect any TimerStack objects in TLS.
   TimerStackIdT newTimerStackID(const IceString &Name);
-  /// dumpTimers() dumps the global timer data.  As such, one probably
-  /// wants to call mergeTimerStacks() as a prerequisite.
+  /// dumpTimers() dumps the global timer data. As such, one probably wants to
+  /// call mergeTimerStacks() as a prerequisite.
   void dumpTimers(TimerStackIdT StackID = TSK_Default,
                   bool DumpCumulative = true);
-  /// The following methods affect only the calling thread's TLS timer
-  /// data.
+  /// The following methods affect only the calling thread's TLS timer data.
   TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
   void pushTimer(TimerIdT ID, TimerStackIdT StackID);
   void popTimer(TimerIdT ID, TimerStackIdT StackID);
   void resetTimer(TimerStackIdT StackID);
   void setTimerName(TimerStackIdT StackID, const IceString &NewName);
 
-  /// This is the first work item sequence number that the parser
-  /// produces, and correspondingly the first sequence number that the
-  /// emitter thread will wait for.  Start numbering at 1 to leave room
-  /// for a sentinel, in case e.g. we wish to inject items with a
-  /// special sequence number that may be executed out of order.
+  /// This is the first work item sequence number that the parser produces, and
+  /// correspondingly the first sequence number that the emitter thread will
+  /// wait for. Start numbering at 1 to leave room for a sentinel, in case e.g.
+  /// we wish to inject items with a special sequence number that may be
+  /// executed out of order.
   static uint32_t getFirstSequenceNumber() { return 1; }
-  /// Adds a newly parsed and constructed function to the Cfg work
-  /// queue.  Notifies any idle workers that a new function is
-  /// available for translating.  May block if the work queue is too
-  /// large, in order to control memory footprint.
+  /// Adds a newly parsed and constructed function to the Cfg work queue.
+  /// Notifies any idle workers that a new function is available for
+  /// translating. May block if the work queue is too large, in order to control
+  /// memory footprint.
   void optQueueBlockingPush(std::unique_ptr<Cfg> Func);
-  /// Takes a Cfg from the work queue for translating.  May block if
-  /// the work queue is currently empty.  Returns nullptr if there is
-  /// no more work - the queue is empty and either end() has been
-  /// called or the Sequential flag was set.
+  /// Takes a Cfg from the work queue for translating. May block if the work
+  /// queue is currently empty. Returns nullptr if there is no more work - the
+  /// queue is empty and either end() has been called or the Sequential flag was
+  /// set.
   std::unique_ptr<Cfg> optQueueBlockingPop();
   /// Notifies that no more work will be added to the work queue.
   void optQueueNotifyEnd() { OptQ.notifyEnd(); }
@@ -378,8 +379,8 @@
     }
     TranslationThreads.clear();
 
-    // Only notify the emit queue to end after all the translation
-    // threads have ended.
+    // Only notify the emit queue to end after all the translation threads have
+    // ended.
     emitQueueNotifyEnd();
     for (std::thread &Worker : EmitterThreads) {
       Worker.join();
@@ -392,8 +393,8 @@
         Timers->mergeFrom(TLS->Timers);
     }
     if (BuildDefs::dump()) {
-      // Do a separate loop over AllThreadContexts to avoid holding
-      // two locks at once.
+      // Do a separate loop over AllThreadContexts to avoid holding two locks
+      // at once.
       auto Stats = getStatsCumulative();
       for (ThreadContext *TLS : AllThreadContexts)
         Stats->add(TLS->StatsCumulative);
@@ -413,8 +414,8 @@
     ICE_TLS_SET_FIELD(TLS, MyTLS);
     emitItems();
   }
-  /// Emit functions and global initializers from the emitter queue
-  /// until the queue is empty.
+  /// Emit functions and global initializers from the emitter queue until the
+  /// queue is empty.
   void emitItems();
 
   /// Uses DataLowering to lower Globals. Side effects:
@@ -425,12 +426,11 @@
   /// Lowers the profile information.
   void lowerProfileData();
 
-  /// Utility function to match a symbol name against a match string.
-  /// This is used in a few cases where we want to take some action on
-  /// a particular function or symbol based on a command-line argument,
-  /// such as changing the verbose level for a particular function.  An
-  /// empty Match argument means match everything.  Returns true if
-  /// there is a match.
+  /// Utility function to match a symbol name against a match string. This is
+  /// used in a few cases where we want to take some action on a particular
+  /// function or symbol based on a command-line argument, such as changing the
+  /// verbose level for a particular function. An empty Match argument means
+  /// match everything. Returns true if there is a match.
   static bool matchSymbolName(const IceString &SymbolName,
                               const IceString &Match) {
     return Match.empty() || Match == SymbolName;
@@ -552,9 +552,9 @@
   static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); }
 };
 
-/// Helper class to push and pop a timer marker.  The constructor
-/// pushes a marker, and the destructor pops it.  This is for
-/// convenient timing of regions of code.
+/// Helper class to push and pop a timer marker. The constructor pushes a
+/// marker, and the destructor pops it. This is for convenient timing of regions
+/// of code.
 class TimerMarker {
   TimerMarker() = delete;
   TimerMarker(const TimerMarker &) = delete;
@@ -589,8 +589,7 @@
   bool Active = false;
 };
 
-/// Helper class for locking the streams and then automatically
-/// unlocking them.
+/// Helper class for locking the streams and then automatically unlocking them.
 class OstreamLocker {
 private:
   OstreamLocker() = delete;
diff --git a/src/IceGlobalInits.cpp b/src/IceGlobalInits.cpp
index c95456c..2c1460a 100644
--- a/src/IceGlobalInits.cpp
+++ b/src/IceGlobalInits.cpp
@@ -8,9 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements the notion of function declarations, global
-/// variable declarations, and the corresponding variable initializers
-/// in Subzero.
+/// This file implements the notion of function declarations, global variable
+/// declarations, and the corresponding variable initializers in Subzero.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -152,9 +151,8 @@
     return;
   dumpType(Stream);
   Stream << " c\"";
-  // Code taken from PrintEscapedString() in AsmWriter.cpp.  Keep
-  // the strings in the same format as the .ll file for practical
-  // diffing.
+  // Code taken from PrintEscapedString() in AsmWriter.cpp. Keep the strings in
+  // the same format as the .ll file for practical diffing.
   for (uint8_t C : Contents) {
     if (isprint(C) && C != '\\' && C != '"')
       Stream << C;
diff --git a/src/IceGlobalInits.h b/src/IceGlobalInits.h
index c15aed0..8f51db2 100644
--- a/src/IceGlobalInits.h
+++ b/src/IceGlobalInits.h
@@ -8,10 +8,10 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares the representation of function declarations,
-/// global variable declarations, and the corresponding variable
-/// initializers in Subzero. Global variable initializers are
-/// represented as a sequence of simple initializers.
+/// This file declares the representation of function declarations, global
+/// variable declarations, and the corresponding variable initializers in
+/// Subzero. Global variable initializers are represented as a sequence of
+/// simple initializers.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -81,8 +81,8 @@
   /// Returns true if when emitting names, we should suppress mangling.
   virtual bool getSuppressMangling() const = 0;
 
-  /// Mangles name for cross tests, unless external and not defined locally
-  /// (so that relocations accross pnacl-sz and pnacl-llc will work).
+  /// Mangles name for cross tests, unless external and not defined locally (so
+  /// that relocations across pnacl-sz and pnacl-llc will work).
   virtual IceString mangleName(GlobalContext *Ctx) const {
     return getSuppressMangling() ? Name : Ctx->mangleName(Name);
   }
@@ -97,8 +97,8 @@
   llvm::GlobalValue::LinkageTypes Linkage;
 };
 
-/// Models a function declaration. This includes the type signature of
-/// the function, its calling conventions, and its linkage.
+/// Models a function declaration. This includes the type signature of the
+/// function, its calling conventions, and its linkage.
 class FunctionDeclaration : public GlobalDeclaration {
   FunctionDeclaration() = delete;
   FunctionDeclaration(const FunctionDeclaration &) = delete;
@@ -286,8 +286,7 @@
              llvm::isa<ZeroInitializer>((*Initializers)[0].get()));
   }
 
-  /// Returns the number of bytes for the initializer of the global
-  /// address.
+  /// Returns the number of bytes for the initializer of the global address.
   SizeT getNumBytes() const {
     SizeT Count = 0;
     for (const std::unique_ptr<Initializer> &Init : *Initializers) {
@@ -296,19 +295,18 @@
     return Count;
   }
 
-  /// Adds Initializer to the list of initializers. Takes ownership of
-  /// the initializer.
+  /// Adds Initializer to the list of initializers. Takes ownership of the
+  /// initializer.
   void addInitializer(std::unique_ptr<Initializer> Initializer) {
     Initializers->emplace_back(std::move(Initializer));
     HasInitializer = true;
   }
 
-  /// Prints out type for initializer associated with the declaration
-  /// to Stream.
+  /// Prints out type for initializer associated with the declaration to Stream.
   void dumpType(Ostream &Stream) const final;
 
-  /// Prints out the definition of the global variable declaration
-  /// (including initialization).
+  /// Prints out the definition of the global variable declaration (including
+  /// initialization).
   void dump(GlobalContext *Ctx, Ostream &Stream) const final;
 
   static bool classof(const GlobalDeclaration *Addr) {
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index b92e954..d9b91f0 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements the Inst class, primarily the various
-/// subclass constructors and dump routines.
+/// This file implements the Inst class, primarily the various subclass
+/// constructors and dump routines.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -82,15 +82,15 @@
   Number = isDeleted() ? NumberDeleted : Func->newInstNumber();
 }
 
-// Delete the instruction if its tentative Dead flag is still set
-// after liveness analysis.
+// Delete the instruction if its tentative Dead flag is still set after
+// liveness analysis.
 void Inst::deleteIfDead() {
   if (Dead)
     setDeleted();
 }
 
-// If Src is a Variable, it returns true if this instruction ends
-// Src's live range.  Otherwise, returns false.
+// If Src is a Variable, it returns true if this instruction ends Src's live
+// range. Otherwise, returns false.
 bool Inst::isLastUse(const Operand *TestSrc) const {
   if (LiveRangesEnded == 0)
     return false; // early-exit optimization
@@ -116,17 +116,16 @@
 // with SpliceAssn spliced in:
 //   d = [x,y]
 //
-// Reconstruct the LiveRangesEnded bitmask in this instruction by
-// combining the LiveRangesEnded values of OrigInst and SpliceAssn.
-// If operands d and [x,y] contain a different number of variables,
-// then the bitmask position for e may be different in OrigInst and
-// the current instruction, requiring extra shifts and masks in the
-// computation.  In the example above, OrigInst has variable e in bit
-// position 3, whereas the current instruction has e in bit position 4
+// Reconstruct the LiveRangesEnded bitmask in this instruction by combining the
+// LiveRangesEnded values of OrigInst and SpliceAssn. If operands d and [x,y]
+// contain a different number of variables, then the bitmask position for e may
+// be different in OrigInst and the current instruction, requiring extra shifts
+// and masks in the computation. In the example above, OrigInst has variable e
+// in bit position 3, whereas the current instruction has e in bit position 4
 // because [x,y] consumes 2 bitmask slots while d only consumed 1.
 //
-// Additionally, set HasSideEffects if either OrigInst or SpliceAssn
-// have HasSideEffects set.
+// Additionally, set HasSideEffects if either OrigInst or SpliceAssn have
+// HasSideEffects set.
 void Inst::spliceLivenessInfo(Inst *OrigInst, Inst *SpliceAssn) {
   HasSideEffects |= OrigInst->HasSideEffects;
   HasSideEffects |= SpliceAssn->HasSideEffects;
@@ -184,8 +183,8 @@
   }
   if (Dead)
     return false;
-  // Phi arguments only get added to Live in the predecessor node, but
-  // we still need to update LiveRangesEnded.
+  // Phi arguments only get added to Live in the predecessor node, but we still
+  // need to update LiveRangesEnded.
   bool IsPhi = llvm::isa<InstPhi>(this);
   resetLastUses();
   FOREACH_VAR_IN_INST(Var, *this) {
@@ -195,20 +194,21 @@
       if (!IsPhi) {
         Live[VarNum] = true;
         // For a variable in SSA form, its live range can end at most once in a
-        // basic block.  However, after lowering to two-address instructions, we
-        // end up with sequences like "t=b;t+=c;a=t" where t's live range begins
-        // and ends twice.  ICE only allows a variable to have a single liveness
-        // interval in a basic block (except for blocks where a variable is
-        // live-in and live-out but there is a gap in the middle).  Therefore,
-        // this lowered sequence needs to represent a single conservative live
-        // range for t.  Since the instructions are being traversed backwards,
-        // we make sure LiveEnd is only set once by setting it only when
-        // LiveEnd[VarNum]==0 (sentinel value).  Note that it's OK to set
-        // LiveBegin multiple times because of the backwards traversal.
+        // basic block. However, after lowering to two-address instructions, we
+        // end up with sequences like "t=b;t+=c;a=t" where t's live range
+        // begins and ends twice. ICE only allows a variable to have a single
+        // liveness interval in a basic block (except for blocks where a
+        // variable is live-in and live-out but there is a gap in the middle).
+        // Therefore, this lowered sequence needs to represent a single
+        // conservative live range for t. Since the instructions are being
+        // traversed backwards, we make sure LiveEnd is only set once by
+        // setting it only when LiveEnd[VarNum]==0 (sentinel value). Note that
+        // it's OK to set LiveBegin multiple times because of the backwards
+        // traversal.
         if (LiveEnd && Liveness->getRangeMask(Var->getIndex())) {
           // Ideally, we would verify that VarNum wasn't already added in this
           // block, but this can't be done very efficiently with LiveEnd as a
-          // vector.  Instead, livenessPostprocess() verifies this after the
+          // vector. Instead, livenessPostprocess() verifies this after the
           // vector has been sorted.
           LiveEnd->push_back(std::make_pair(VarNum, InstNumber));
         }
@@ -249,9 +249,9 @@
   addSource(Source);
 }
 
-// If TargetTrue==TargetFalse, we turn it into an unconditional
-// branch.  This ensures that, along with the 'switch' instruction
-// semantics, there is at most one edge from one node to another.
+// If TargetTrue==TargetFalse, we turn it into an unconditional branch. This
+// ensures that, along with the 'switch' instruction semantics, there is at
+// most one edge from one node to another.
 InstBr::InstBr(Cfg *Func, Operand *Source, CfgNode *TargetTrue_,
                CfgNode *TargetFalse_)
     : InstHighLevel(Func, Inst::Br, 1, nullptr), TargetFalse(TargetFalse_),
@@ -334,18 +334,18 @@
   Labels = Func->allocateArrayOf<CfgNode *>(MaxSrcs);
 }
 
-// TODO: A Switch instruction (and maybe others) can add duplicate
-// edges.  We may want to de-dup Phis and validate consistency (i.e.,
-// the source operands are the same for duplicate edges), though it
-// seems the current lowering code is OK with this situation.
+// TODO: A Switch instruction (and maybe others) can add duplicate edges. We
+// may want to de-dup Phis and validate consistency (i.e., the source operands
+// are the same for duplicate edges), though it seems the current lowering code
+// is OK with this situation.
 void InstPhi::addArgument(Operand *Source, CfgNode *Label) {
   Labels[getSrcSize()] = Label;
   addSource(Source);
 }
 
-// Find the source operand corresponding to the incoming edge for the
-// given node.  TODO: This uses a linear-time search, which could be
-// improved if it becomes a problem.
+// Find the source operand corresponding to the incoming edge for the given
+// node. TODO: This uses a linear-time search, which could be improved if it
+// becomes a problem.
 Operand *InstPhi::getOperandForTarget(CfgNode *Target) const {
   for (SizeT I = 0; I < getSrcSize(); ++I) {
     if (Labels[I] == Target)
@@ -355,9 +355,9 @@
   return nullptr;
 }
 
-// Updates liveness for a particular operand based on the given
-// predecessor edge.  Doesn't mark the operand as live if the Phi
-// instruction is dead or deleted.
+// Updates liveness for a particular operand based on the given predecessor
+// edge. Doesn't mark the operand as live if the Phi instruction is dead or
+// deleted.
 void InstPhi::livenessPhiOperand(LivenessBV &Live, CfgNode *Target,
                                  Liveness *Liveness) {
   if (isDeleted() || Dead)
@@ -377,8 +377,8 @@
   llvm_unreachable("Phi operand not found for specified target node");
 }
 
-// Change "a=phi(...)" to "a_phi=phi(...)" and return a new
-// instruction "a=a_phi".
+// Change "a=phi(...)" to "a_phi=phi(...)" and return a new instruction
+// "a=a_phi".
 Inst *InstPhi::lower(Cfg *Func) {
   Variable *Dest = getDest();
   assert(Dest);
@@ -562,8 +562,8 @@
     return;
   Ostream &Str = Func->getContext()->getStrDump();
   bool First = true;
-  // Print "LIVEEND={a,b,c}" for all source operands whose live ranges
-  // are known to end at this instruction.
+  // Print "LIVEEND={a,b,c}" for all source operands whose live ranges are
+  // known to end at this instruction.
   if (Func->isVerbose(IceV_Liveness)) {
     FOREACH_VAR_IN_INST(Var, *this) {
       if (isLastUse(Var)) {
@@ -886,8 +886,7 @@
 void InstFakeDef::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
-  // Go ahead and "emit" these for now, since they are relatively
-  // rare.
+  // Go ahead and "emit" these for now, since they are relatively rare.
   Ostream &Str = Func->getContext()->getStrEmit();
   Str << "\t# ";
   getDest()->emit(Func);
@@ -948,9 +947,8 @@
   if (!SrcVar)
     return false;
   if (Dest->hasReg() && Dest->getRegNum() == SrcVar->getRegNum()) {
-    // TODO: On x86-64, instructions like "mov eax, eax" are used to
-    // clear the upper 32 bits of rax.  We need to recognize and
-    // preserve these.
+    // TODO: On x86-64, instructions like "mov eax, eax" are used to clear the
+    // upper 32 bits of rax. We need to recognize and preserve these.
     return true;
   }
   if (!Dest->hasReg() && !SrcVar->hasReg() &&
diff --git a/src/IceInst.def b/src/IceInst.def
index a9cadb2..d265213 100644
--- a/src/IceInst.def
+++ b/src/IceInst.def
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines properties of ICE instructions in the form of
-// x-macros.
+// This file defines properties of ICE instructions in the form of x-macros.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/src/IceInst.h b/src/IceInst.h
index 80b2bd2..a727683 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -25,9 +25,9 @@
 
 // TODO: The Cfg structure, and instructions in particular, need to be
 // validated for things like valid operand types, valid branch targets, proper
-// ordering of Phi and non-Phi instructions, etc. Most of the validity
-// checking will be done in the bitcode reader. We need a list of everything
-// that should be validated, and tests for each.
+// ordering of Phi and non-Phi instructions, etc. Most of the validity checking
+// will be done in the bitcode reader. We need a list of everything that should
+// be validated, and tests for each.
 
 namespace Ice {
 
@@ -118,9 +118,9 @@
     return NodeList();
   }
   virtual bool isUnconditionalBranch() const { return false; }
-  /// If the instruction is a branch-type instruction with OldNode as a
-  /// target, repoint it to NewNode and return true, otherwise return
-  /// false. Repoint all instances of OldNode as a target.
+  /// If the instruction is a branch-type instruction with OldNode as a target,
+  /// repoint it to NewNode and return true, otherwise return false. Repoint all
+  /// instances of OldNode as a target.
   virtual bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) {
     (void)OldNode;
     (void)NewNode;
@@ -130,11 +130,11 @@
   virtual bool isSimpleAssign() const { return false; }
 
   void livenessLightweight(Cfg *Func, LivenessBV &Live);
-  // Calculates liveness for this instruction.  Returns true if this
-  /// instruction is (tentatively) still live and should be retained, and false
-  /// if this instruction is (tentatively) dead and should be deleted. The
-  /// decision is tentative until the liveness dataflow algorithm has converged,
-  /// and then a separate pass permanently deletes dead instructions.
+  /// Calculates liveness for this instruction. Returns true if this instruction
+  /// is (tentatively) still live and should be retained, and false if this
+  /// instruction is (tentatively) dead and should be deleted. The decision is
+  /// tentative until the liveness dataflow algorithm has converged, and then a
+  /// separate pass permanently deletes dead instructions.
   bool liveness(InstNumberT InstNumber, LivenessBV &Live, Liveness *Liveness,
                 LiveBeginEndMap *LiveBegin, LiveBeginEndMap *LiveEnd);
 
@@ -143,13 +143,12 @@
   /// instructions, and a target-specific instruction results in a single native
   /// instruction.
   virtual uint32_t getEmitInstCount() const { return 0; }
-  // TODO(stichnot): Change Inst back to abstract once the g++ build
-  // issue is fixed.  llvm::ilist<Ice::Inst> doesn't work under g++
-  // because the resize(size_t, Ice::Inst) method is incorrectly
-  // declared and thus doesn't allow the abstract class Ice::Inst.
-  // The method should be declared resize(size_t, const Ice::Inst &).
-  // virtual void emit(const Cfg *Func) const = 0;
-  // virtual void emitIAS(const Cfg *Func) const = 0;
+  // TODO(stichnot): Change Inst back to abstract once the g++ build issue is
+  // fixed. llvm::ilist<Ice::Inst> doesn't work under g++ because the
+  // resize(size_t, Ice::Inst) method is incorrectly declared and thus doesn't
+  // allow the abstract class Ice::Inst. The method should be declared
+  // resize(size_t, const Ice::Inst &). virtual void emit(const Cfg *Func)
+  // const = 0; virtual void emitIAS(const Cfg *Func) const = 0;
   virtual void emit(const Cfg *) const {
     llvm_unreachable("emit on abstract class");
   }
@@ -179,8 +178,8 @@
       LiveRangesEnded |= (((LREndedBits)1u) << VarIndex);
   }
   void resetLastUses() { LiveRangesEnded = 0; }
-  /// The destroy() method lets the instruction cleanly release any
-  /// memory that was allocated via the Cfg's allocator.
+  /// The destroy() method lets the instruction cleanly release any memory that
+  /// was allocated via the Cfg's allocator.
   virtual void destroy(Cfg *Func) { Func->deallocateArrayOf<Operand *>(Srcs); }
 
   const InstKind Kind;
@@ -188,17 +187,17 @@
   InstNumberT Number;
   /// Deleted means irrevocably deleted.
   bool Deleted = false;
-  /// Dead means one of two things depending on context: (1) pending
-  /// deletion after liveness analysis converges, or (2) marked for
-  /// deletion during lowering due to a folded bool operation.
+  /// Dead means one of two things depending on context: (1) pending deletion
+  /// after liveness analysis converges, or (2) marked for deletion during
+  /// lowering due to a folded bool operation.
   bool Dead = false;
-  /// HasSideEffects means the instruction is something like a function
-  /// call or a volatile load that can't be removed even if its Dest
-  /// variable is not live.
+  /// HasSideEffects means the instruction is something like a function call or
+  /// a volatile load that can't be removed even if its Dest variable is not
+  /// live.
   bool HasSideEffects = false;
-  /// IsDestNonKillable means that liveness analysis shouldn't consider
-  /// this instruction to kill the Dest variable.  This is used when
-  /// lowering produces two assignments to the same variable.
+  /// IsDestNonKillable means that liveness analysis shouldn't consider this
+  /// instruction to kill the Dest variable. This is used when lowering produces
+  /// two assignments to the same variable.
   bool IsDestNonKillable = false;
 
   Variable *Dest;
@@ -207,13 +206,12 @@
   Operand **Srcs;
 
   /// LiveRangesEnded marks which Variables' live ranges end in this
-  /// instruction.  An instruction can have an arbitrary number of
-  /// source operands (e.g. a call instruction), and each source
-  /// operand can contain 0 or 1 Variable (and target-specific operands
-  /// could contain more than 1 Variable).  All the variables in an
-  /// instruction are conceptually flattened and each variable is
-  /// mapped to one bit position of the LiveRangesEnded bit vector.
-  /// Only the first CHAR_BIT * sizeof(LREndedBits) variables are
+  /// instruction. An instruction can have an arbitrary number of source
+  /// operands (e.g. a call instruction), and each source operand can contain 0
+  /// or 1 Variable (and target-specific operands could contain more than 1
+  /// Variable). All the variables in an instruction are conceptually flattened
+  /// and each variable is mapped to one bit position of the LiveRangesEnded bit
+  /// vector. Only the first CHAR_BIT * sizeof(LREndedBits) variables are
   /// tracked this way.
   using LREndedBits = uint32_t; // only first 32 src operands tracked, sorry
   LREndedBits LiveRangesEnded;
@@ -235,9 +233,9 @@
   }
 };
 
-/// Alloca instruction.  This captures the size in bytes as getSrc(0),
-/// and the required alignment in bytes.  The alignment must be either
-/// 0 (no alignment required) or a power of 2.
+/// Alloca instruction. This captures the size in bytes as getSrc(0), and the
+/// required alignment in bytes. The alignment must be either 0 (no alignment
+/// required) or a power of 2.
 class InstAlloca : public InstHighLevel {
   InstAlloca() = delete;
   InstAlloca(const InstAlloca &) = delete;
@@ -261,8 +259,8 @@
   const uint32_t AlignInBytes;
 };
 
-/// Binary arithmetic instruction.  The source operands are captured in
-/// getSrc(0) and getSrc(1).
+/// Binary arithmetic instruction. The source operands are captured in getSrc(0)
+/// and getSrc(1).
 class InstArithmetic : public InstHighLevel {
   InstArithmetic() = delete;
   InstArithmetic(const InstArithmetic &) = delete;
@@ -296,12 +294,11 @@
   const OpKind Op;
 };
 
-/// Assignment instruction.  The source operand is captured in
-/// getSrc(0).  This is not part of the LLVM bitcode, but is a useful
-/// abstraction for some of the lowering.  E.g., if Phi instruction
-/// lowering happens before target lowering, or for representing an
-/// Inttoptr instruction, or as an intermediate step for lowering a
-/// Load instruction.
+/// Assignment instruction. The source operand is captured in getSrc(0). This is
+/// not part of the LLVM bitcode, but is a useful abstraction for some of the
+/// lowering. E.g., if Phi instruction lowering happens before target lowering,
+/// or for representing an Inttoptr instruction, or as an intermediate step for
+/// lowering a Load instruction.
 class InstAssign : public InstHighLevel {
   InstAssign() = delete;
   InstAssign(const InstAssign &) = delete;
@@ -319,16 +316,16 @@
   InstAssign(Cfg *Func, Variable *Dest, Operand *Source);
 };
 
-/// Branch instruction.  This represents both conditional and
-/// unconditional branches.
+/// Branch instruction. This represents both conditional and unconditional
+/// branches.
 class InstBr : public InstHighLevel {
   InstBr() = delete;
   InstBr(const InstBr &) = delete;
   InstBr &operator=(const InstBr &) = delete;
 
 public:
-  /// Create a conditional branch.  If TargetTrue==TargetFalse, it is
-  /// optimized to an unconditional branch.
+  /// Create a conditional branch. If TargetTrue==TargetFalse, it is optimized
+  /// to an unconditional branch.
   static InstBr *create(Cfg *Func, Operand *Source, CfgNode *TargetTrue,
                         CfgNode *TargetFalse) {
     return new (Func->allocate<InstBr>())
@@ -365,8 +362,8 @@
   CfgNode *TargetTrue;  /// nullptr if unconditional branch
 };
 
-/// Call instruction.  The call target is captured as getSrc(0), and
-/// arg I is captured as getSrc(I+1).
+/// Call instruction. The call target is captured as getSrc(0), and arg I is
+/// captured as getSrc(I+1).
 class InstCall : public InstHighLevel {
   InstCall() = delete;
   InstCall(const InstCall &) = delete;
@@ -376,8 +373,8 @@
   static InstCall *create(Cfg *Func, SizeT NumArgs, Variable *Dest,
                           Operand *CallTarget, bool HasTailCall) {
     /// Set HasSideEffects to true so that the call instruction can't be
-    /// dead-code eliminated. IntrinsicCalls can override this if the
-    /// particular intrinsic is deletable and has no side-effects.
+    /// dead-code eliminated. IntrinsicCalls can override this if the particular
+    /// intrinsic is deletable and has no side-effects.
     const bool HasSideEffects = true;
     const InstKind Kind = Inst::Call;
     return new (Func->allocate<InstCall>()) InstCall(
@@ -458,8 +455,8 @@
                      Operand *Source2);
 };
 
-/// Floating-point comparison instruction.  The source operands are
-/// captured in getSrc(0) and getSrc(1).
+/// Floating-point comparison instruction. The source operands are captured in
+/// getSrc(0) and getSrc(1).
 class InstFcmp : public InstHighLevel {
   InstFcmp() = delete;
   InstFcmp(const InstFcmp &) = delete;
@@ -489,8 +486,8 @@
   const FCond Condition;
 };
 
-/// Integer comparison instruction.  The source operands are captured
-/// in getSrc(0) and getSrc(1).
+/// Integer comparison instruction. The source operands are captured in
+/// getSrc(0) and getSrc(1).
 class InstIcmp : public InstHighLevel {
   InstIcmp() = delete;
   InstIcmp(const InstIcmp &) = delete;
@@ -543,8 +540,8 @@
                     Operand *Source2, Operand *Source3);
 };
 
-/// Call to an intrinsic function.  The call target is captured as getSrc(0),
-/// and arg I is captured as getSrc(I+1).
+/// Call to an intrinsic function. The call target is captured as getSrc(0), and
+/// arg I is captured as getSrc(I+1).
 class InstIntrinsicCall : public InstCall {
   InstIntrinsicCall() = delete;
   InstIntrinsicCall(const InstIntrinsicCall &) = delete;
@@ -573,7 +570,7 @@
   const Intrinsics::IntrinsicInfo Info;
 };
 
-/// Load instruction.  The source address is captured in getSrc(0).
+/// Load instruction. The source address is captured in getSrc(0).
 class InstLoad : public InstHighLevel {
   InstLoad() = delete;
   InstLoad(const InstLoad &) = delete;
@@ -594,8 +591,8 @@
   InstLoad(Cfg *Func, Variable *Dest, Operand *SourceAddr);
 };
 
-/// Phi instruction.  For incoming edge I, the node is Labels[I] and
-/// the Phi source operand is getSrc(I).
+/// Phi instruction. For incoming edge I, the node is Labels[I] and the Phi
+/// source operand is getSrc(I).
 class InstPhi : public InstHighLevel {
   InstPhi() = delete;
   InstPhi(const InstPhi &) = delete;
@@ -621,15 +618,15 @@
     Inst::destroy(Func);
   }
 
-  /// Labels[] duplicates the InEdges[] information in the enclosing
-  /// CfgNode, but the Phi instruction is created before InEdges[]
-  /// is available, so it's more complicated to share the list.
+  /// Labels[] duplicates the InEdges[] information in the enclosing CfgNode,
+  /// but the Phi instruction is created before InEdges[] is available, so it's
+  /// more complicated to share the list.
   CfgNode **Labels;
 };
 
-/// Ret instruction.  The return value is captured in getSrc(0), but if
-/// there is no return value (void-type function), then
-/// getSrcSize()==0 and hasRetValue()==false.
+/// Ret instruction. The return value is captured in getSrc(0), but if there is
+/// no return value (void-type function), then getSrcSize()==0 and
+/// hasRetValue()==false.
 class InstRet : public InstHighLevel {
   InstRet() = delete;
   InstRet(const InstRet &) = delete;
@@ -675,8 +672,8 @@
              Operand *Source2);
 };
 
-/// Store instruction.  The address operand is captured, along with the
-/// data operand to be stored into the address.
+/// Store instruction. The address operand is captured, along with the data
+/// operand to be stored into the address.
 class InstStore : public InstHighLevel {
   InstStore() = delete;
   InstStore(const InstStore &) = delete;
@@ -700,8 +697,7 @@
   InstStore(Cfg *Func, Operand *Data, Operand *Addr);
 };
 
-/// Switch instruction.  The single source operand is captured as
-/// getSrc(0).
+/// Switch instruction. The single source operand is captured as getSrc(0).
 class InstSwitch : public InstHighLevel {
   InstSwitch() = delete;
   InstSwitch(const InstSwitch &) = delete;
@@ -744,8 +740,7 @@
   CfgNode **Labels; /// size is NumCases
 };
 
-/// Unreachable instruction.  This is a terminator instruction with no
-/// operands.
+/// Unreachable instruction. This is a terminator instruction with no operands.
 class InstUnreachable : public InstHighLevel {
   InstUnreachable() = delete;
   InstUnreachable(const InstUnreachable &) = delete;
@@ -765,7 +760,7 @@
   explicit InstUnreachable(Cfg *Func);
 };
 
-/// BundleLock instruction.  There are no operands.  Contains an option
+/// BundleLock instruction.  There are no operands. Contains an option
 /// indicating whether align_to_end is specified.
 class InstBundleLock : public InstHighLevel {
   InstBundleLock() = delete;
@@ -791,7 +786,7 @@
   InstBundleLock(Cfg *Func, Option BundleOption);
 };
 
-/// BundleUnlock instruction.  There are no operands.
+/// BundleUnlock instruction. There are no operands.
 class InstBundleUnlock : public InstHighLevel {
   InstBundleUnlock() = delete;
   InstBundleUnlock(const InstBundleUnlock &) = delete;
@@ -812,18 +807,17 @@
   explicit InstBundleUnlock(Cfg *Func);
 };
 
-/// FakeDef instruction.  This creates a fake definition of a variable,
-/// which is how we represent the case when an instruction produces
-/// multiple results.  This doesn't happen with high-level ICE
-/// instructions, but might with lowered instructions.  For example,
-/// this would be a way to represent condition flags being modified by
-/// an instruction.
+/// FakeDef instruction. This creates a fake definition of a variable, which is
+/// how we represent the case when an instruction produces multiple results.
+/// This doesn't happen with high-level ICE instructions, but might with lowered
+/// instructions. For example, this would be a way to represent condition flags
+/// being modified by an instruction.
 ///
-/// It's generally useful to set the optional source operand to be the
-/// dest variable of the instruction that actually produces the FakeDef
-/// dest.  Otherwise, the original instruction could be dead-code
-/// eliminated if its dest operand is unused, and therefore the FakeDef
-/// dest wouldn't be properly initialized.
+/// It's generally useful to set the optional source operand to be the dest
+/// variable of the instruction that actually produces the FakeDef dest.
+/// Otherwise, the original instruction could be dead-code eliminated if its
+/// dest operand is unused, and therefore the FakeDef dest wouldn't be properly
+/// initialized.
 class InstFakeDef : public InstHighLevel {
   InstFakeDef() = delete;
   InstFakeDef(const InstFakeDef &) = delete;
@@ -843,11 +837,10 @@
   InstFakeDef(Cfg *Func, Variable *Dest, Variable *Src);
 };
 
-/// FakeUse instruction.  This creates a fake use of a variable, to
-/// keep the instruction that produces that variable from being
-/// dead-code eliminated.  This is useful in a variety of lowering
-/// situations.  The FakeUse instruction has no dest, so it can itself
-/// never be dead-code eliminated.
+/// FakeUse instruction. This creates a fake use of a variable, to keep the
+/// instruction that produces that variable from being dead-code eliminated.
+/// This is useful in a variety of lowering situations. The FakeUse instruction
+/// has no dest, so it can itself never be dead-code eliminated.
 class InstFakeUse : public InstHighLevel {
   InstFakeUse() = delete;
   InstFakeUse(const InstFakeUse &) = delete;
@@ -866,16 +859,15 @@
   InstFakeUse(Cfg *Func, Variable *Src);
 };
 
-/// FakeKill instruction.  This "kills" a set of variables by modeling
-/// a trivial live range at this instruction for each (implicit)
-/// variable.  The primary use is to indicate that scratch registers
-/// are killed after a call, so that the register allocator won't
-/// assign a scratch register to a variable whose live range spans a
-/// call.
+/// FakeKill instruction. This "kills" a set of variables by modeling a trivial
+/// live range at this instruction for each (implicit) variable. The primary use
+/// is to indicate that scratch registers are killed after a call, so that the
+/// register allocator won't assign a scratch register to a variable whose live
+/// range spans a call.
 ///
-/// The FakeKill instruction also holds a pointer to the instruction
-/// that kills the set of variables, so that if that linked instruction
-/// gets dead-code eliminated, the FakeKill instruction will as well.
+/// The FakeKill instruction also holds a pointer to the instruction that kills
+/// the set of variables, so that if that linked instruction gets dead-code
+/// eliminated, the FakeKill instruction will as well.
 class InstFakeKill : public InstHighLevel {
   InstFakeKill() = delete;
   InstFakeKill(const InstFakeKill &) = delete;
@@ -898,10 +890,9 @@
   const Inst *Linked;
 };
 
-/// JumpTable instruction. This represents a jump table that will be
-/// stored in the .rodata section. This is used to track and repoint
-/// the target CfgNodes which may change, for example due to
-/// splitting for phi lowering.
+/// JumpTable instruction. This represents a jump table that will be stored in
+/// the .rodata section. This is used to track and repoint the target CfgNodes
+/// which may change, for example due to splitting for phi lowering.
 class InstJumpTable : public InstHighLevel {
   InstJumpTable() = delete;
   InstJumpTable(const InstJumpTable &) = delete;
@@ -968,8 +959,8 @@
 
 namespace llvm {
 
-/// Override the default ilist traits so that Inst's private ctor and
-/// deleted dtor aren't invoked.
+/// Override the default ilist traits so that Inst's private ctor and deleted
+/// dtor aren't invoked.
 template <>
 struct ilist_traits<Ice::Inst> : public ilist_default_traits<Ice::Inst> {
   Ice::Inst *createSentinel() const {
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index 9a68115..d449641 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements the InstARM32 and OperandARM32 classes,
-/// primarily the constructors and the dump()/emit() methods.
+/// This file implements the InstARM32 and OperandARM32 classes, primarily the
+/// constructors and the dump()/emit() methods.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -271,16 +271,14 @@
       TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label) {}
 
 bool InstARM32Br::optimizeBranch(const CfgNode *NextNode) {
-  // If there is no next block, then there can be no fallthrough to
-  // optimize.
+  // If there is no next block, then there can be no fallthrough to optimize.
   if (NextNode == nullptr)
     return false;
   // Intra-block conditional branches can't be optimized.
   if (Label)
     return false;
-  // If there is no fallthrough node, such as a non-default case label
-  // for a switch instruction, then there is no opportunity to
-  // optimize.
+  // If there is no fallthrough node, such as a non-default case label for a
+  // switch instruction, then there is no opportunity to optimize.
   if (getTargetFalse() == nullptr)
     return false;
 
@@ -290,15 +288,15 @@
     setDeleted();
     return true;
   }
-  // If the fallthrough is to the next node, set fallthrough to nullptr
-  // to indicate.
+  // If the fallthrough is to the next node, set fallthrough to nullptr to
+  // indicate.
   if (getTargetFalse() == NextNode) {
     TargetFalse = nullptr;
     return true;
   }
-  // If TargetTrue is the next node, and TargetFalse is not nullptr
-  // (which was already tested above), then invert the branch
-  // condition, swap the targets, and set new fallthrough to nullptr.
+  // If TargetTrue is the next node, and TargetFalse is not nullptr (which was
+  // already tested above), then invert the branch condition, swap the targets,
+  // and set new fallthrough to nullptr.
   if (getTargetTrue() == NextNode) {
     assert(Predicate != CondARM32::AL);
     setPredicate(getOppositeCondition(getPredicate()));
@@ -338,10 +336,10 @@
 
 InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests)
     : InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) {
-  // Track modifications to Dests separately via FakeDefs.
-  // Also, a pop instruction affects the stack pointer and so it should not
-  // be allowed to be automatically dead-code eliminated. This is automatic
-  // since we leave the Dest as nullptr.
+  // Track modifications to Dests separately via FakeDefs. Also, a pop
+  // instruction affects the stack pointer and so it should not be allowed to
+  // be automatically dead-code eliminated. This is automatic since we leave
+  // the Dest as nullptr.
 }
 
 InstARM32Push::InstARM32Push(Cfg *Func, const VarList &Srcs)
@@ -450,8 +448,8 @@
     Operand *Src0 = getSrc(0);
     if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) {
       if (!Src0V->hasReg()) {
-        // Always use the whole stack slot. A 32-bit load has a larger range
-        // of offsets than 16-bit, etc.
+        // Always use the whole stack slot. A 32-bit load has a larger range of
+        // offsets than 16-bit, etc.
         ActualOpcode = IceString("ldr");
       }
     } else {
@@ -662,13 +660,13 @@
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(getSrcSize() == 1);
   if (llvm::isa<ConstantInteger32>(getCallTarget())) {
-    // This shouldn't happen (typically have to copy the full 32-bits
-    // to a register and do an indirect jump).
+    // This shouldn't happen (typically have to copy the full 32-bits to a
+    // register and do an indirect jump).
     llvm::report_fatal_error("ARM32Call to ConstantInteger32");
   } else if (const auto CallTarget =
                  llvm::dyn_cast<ConstantRelocatable>(getCallTarget())) {
-    // Calls only have 24-bits, but the linker should insert veneers to
-    // extend the range if needed.
+    // Calls only have 24-bits, but the linker should insert veneers to extend
+    // the range if needed.
     Str << "\t"
         << "bl"
         << "\t";
diff --git a/src/IceInstARM32.def b/src/IceInstARM32.def
index 1836667..4e34cbf 100644
--- a/src/IceInstARM32.def
+++ b/src/IceInstARM32.def
@@ -17,20 +17,20 @@
 // NOTE: PC and SP are not considered isInt, to avoid register allocating.
 //
 // For the NaCl sandbox we also need to r9 for TLS, so just reserve always.
-// TODO(jvoung): Allow r9 to be isInt when sandboxing is turned off
-// (native mode).
+// TODO(jvoung): Allow r9 to be isInt when sandboxing is turned off (native
+// mode).
 //
 // IP is not considered isInt to reserve it as a scratch register. A scratch
 // register is useful for expanding instructions post-register allocation.
 //
-// LR is not considered isInt to avoid being allocated as a register.
-// It is technically preserved, but save/restore is handled separately,
-// based on whether or not the function MaybeLeafFunc.
+// LR is not considered isInt to avoid being allocated as a register. It is
+// technically preserved, but save/restore is handled separately, based on
+// whether or not the function MaybeLeafFunc.
 
 // ALIASESn is a family of macros that we use to define register aliasing in
 // ARM32. n indicates how many aliases are being provided to the macro. It
-// assumes the parameters are register names declared in a namespace/class named
-// RegARM32.
+// assumes the parameters are register names declared in a namespace/class
+// named RegARM32.
 #define ALIASES1(r0)                                                           \
     {RegARM32::r0}
 #define ALIASES2(r0, r1)                                                       \
@@ -152,12 +152,12 @@
 //#define X(val, encode, name, scratch, preserved, stackptr, frameptr,
 //          isInt, isFP32,isFP64, isVec128, aliases_init)
 
-// D registers 0-7 are scratch, 8-15 are preserved, and 16-31
-// are also scratch (if supported by the D32 feature vs D16).
-// D registers are defined in reverse order so that, during register allocation,
-// Subzero will prefer higher D registers. In processors supporting the D32
-// feature this will effectively cause double allocation to bias towards
-// allocating "high" D registers, which do not alias any S registers.
+// D registers 0-7 are scratch, 8-15 are preserved, and 16-31 are also scratch
+// (if supported by the D32 feature vs D16). D registers are defined in reverse
+// order so that, during register allocation, Subzero will prefer higher D
+// registers. In processors supporting the D32 feature this will effectively
+// cause double allocation to bias towards allocating "high" D registers, which
+// do not alias any S registers.
 //
 // Regenerate this with the following python script:
 // def print_dregs():
@@ -251,9 +251,9 @@
 //#define X(val, encode, name, scratch, preserved, stackptr, frameptr,
 //          isInt, isFP32, isFP64, isVec128, aliases_init)
 
-// Q registers 0-3 are scratch, 4-7 are preserved, and 8-15
-// are also scratch (if supported by the D32 feature).
-// Q registers are defined in reverse order for the same reason as D registers.
+// Q registers 0-3 are scratch, 4-7 are preserved, and 8-15 are also scratch
+// (if supported by the D32 feature). Q registers are defined in reverse order
+// for the same reason as D registers.
 //
 // Regenerate this with the following python script:
 // def print_qregs():
@@ -320,10 +320,10 @@
 //          isInt, isFP32, isFP64, isVec128, alias_init)
 #undef ALIASES
 
-// We also provide a combined table, so that there is a namespace where
-// all of the registers are considered and have distinct numberings.
-// This is in contrast to the above, where the "encode" is based on how
-// the register numbers will be encoded in binaries and values can overlap.
+// We also provide a combined table, so that there is a namespace where all of
+// the registers are considered and have distinct numberings. This is in
+// contrast to the above, where the "encode" is based on how the register
+// numbers will be encoded in binaries and values can overlap.
 #define REGARM32_TABLE                                                         \
   /* val, encode, name, scratch, preserved, stackptr, frameptr, isInt,         \
      isFP32, isFP64, isVec128, alias_init */                                   \
@@ -347,8 +347,8 @@
 // define X(val, init)
 
 // Load/Store instruction width suffixes and FP/Vector element size suffixes
-// the # of offset bits allowed as part of an addressing mode (for sign or
-// zero extending load/stores).
+// the # of offset bits allowed as part of an addressing mode (for sign or zero
+// extending load/stores).
 #define ICETYPEARM32_TABLE                                                     \
   /* tag,          element type, int_width, vec_width, addr bits sext, zext */ \
   X(IceType_void,  IceType_void, "" , ""    , 0 , 0)                           \
@@ -378,9 +378,9 @@
   X(RRX, "rrx")
 //#define X(tag, emit)
 
-// Attributes for the condition code 4-bit encoding (that is independent
-// of the APSR's NZCV fields). For example, EQ is 0, but corresponds to
-// Z = 1, and NE is 1, but corresponds to Z = 0.
+// Attributes for the condition code 4-bit encoding (that is independent of the
+// APSR's NZCV fields). For example, EQ is 0, but corresponds to Z = 1, and NE
+// is 1, but corresponds to Z = 0.
 #define ICEINSTARM32COND_TABLE                                                 \
   /* enum value, encoding, opposite, emit */                                   \
   X(EQ, 0 , NE, "eq")   /* equal */                                            \
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h
index 8c2ea6f..89e5655 100644
--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares the InstARM32 and OperandARM32 classes and
-/// their subclasses.  This represents the machine instructions and
-/// operands used for ARM32 code selection.
+/// This file declares the InstARM32 and OperandARM32 classes and their
+/// subclasses. This represents the machine instructions and operands used for
+/// ARM32 code selection.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -27,7 +27,7 @@
 
 class TargetARM32;
 
-/// OperandARM32 extends the Operand hierarchy.  Its subclasses are
+/// OperandARM32 extends the Operand hierarchy. Its subclasses are
 /// OperandARM32Mem and OperandARM32Flex.
 class OperandARM32 : public Operand {
   OperandARM32() = delete;
@@ -87,17 +87,17 @@
   /// NOTE: The Variable-typed operands have to be registers.
   ///
   /// (1) Reg + Imm. The Immediate actually has a limited number of bits
-  /// for encoding, so check canHoldOffset first. It cannot handle
-  /// general Constant operands like ConstantRelocatable, since a relocatable
-  /// can potentially take up too many bits.
+  /// for encoding, so check canHoldOffset first. It cannot handle general
+  /// Constant operands like ConstantRelocatable, since a relocatable can
+  /// potentially take up too many bits.
   static OperandARM32Mem *create(Cfg *Func, Type Ty, Variable *Base,
                                  ConstantInteger32 *ImmOffset,
                                  AddrMode Mode = Offset) {
     return new (Func->allocate<OperandARM32Mem>())
         OperandARM32Mem(Func, Ty, Base, ImmOffset, Mode);
   }
-  /// (2) Reg +/- Reg with an optional shift of some kind and amount.
-  /// Note that this mode is disallowed in the NaCl sandbox.
+  /// (2) Reg +/- Reg with an optional shift of some kind and amount. Note that
+  /// this mode is disallowed in the NaCl sandbox.
   static OperandARM32Mem *create(Cfg *Func, Type Ty, Variable *Base,
                                  Variable *Index, ShiftKind ShiftOp = kNoShift,
                                  uint16_t ShiftAmt = 0,
@@ -130,10 +130,10 @@
     return Operand->getKind() == static_cast<OperandKind>(kMem);
   }
 
-  /// Return true if a load/store instruction for an element of type Ty
-  /// can encode the Offset directly in the immediate field of the 32-bit
-  /// ARM instruction. For some types, if the load is Sign extending, then
-  /// the range is reduced.
+  /// Return true if a load/store instruction for an element of type Ty can
+  /// encode the Offset directly in the immediate field of the 32-bit ARM
+  /// instruction. For some types, if the load is Sign extending, then the range
+  /// is reduced.
   static bool canHoldOffset(Type Ty, bool SignExt, int32_t Offset);
 
 private:
@@ -150,10 +150,9 @@
   AddrMode Mode;
 };
 
-/// OperandARM32Flex represent the "flexible second operand" for
-/// data-processing instructions. It can be a rotatable 8-bit constant, or
-/// a register with an optional shift operand. The shift amount can even be
-/// a third register.
+/// OperandARM32Flex represent the "flexible second operand" for data-processing
+/// instructions. It can be a rotatable 8-bit constant, or a register with an
+/// optional shift operand. The shift amount can even be a third register.
 class OperandARM32Flex : public OperandARM32 {
   OperandARM32Flex() = delete;
   OperandARM32Flex(const OperandARM32Flex &) = delete;
@@ -191,8 +190,8 @@
     return Operand->getKind() == static_cast<OperandKind>(kFlexImm);
   }
 
-  /// Return true if the Immediate can fit in the ARM flexible operand.
-  /// Fills in the out-params RotateAmt and Immed_8 if Immediate fits.
+  /// Return true if the Immediate can fit in the ARM flexible operand. Fills in
+  /// the out-params RotateAmt and Immed_8 if Immediate fits.
   static bool canHoldImm(uint32_t Immediate, uint32_t *RotateAmt,
                          uint32_t *Immed_8);
 
@@ -244,9 +243,9 @@
 
 /// StackVariable represents a Var that isn't assigned a register (stack-only).
 /// It is assigned a stack slot, but the slot's offset may be too large to
-/// represent in the native addressing mode, and so it has a separate
-/// base register from SP/FP, where the offset from that base register is
-/// then in range.
+/// represent in the native addressing mode, and so it has a separate base
+/// register from SP/FP, where the offset from that base register is then in
+/// range.
 class StackVariable final : public Variable {
   StackVariable() = delete;
   StackVariable(const StackVariable &) = delete;
@@ -272,8 +271,8 @@
 };
 
 /// Base class for ARM instructions. While most ARM instructions can be
-/// conditionally executed, a few of them are not predicable (halt,
-/// memory barriers, etc.).
+/// conditionally executed, a few of them are not predicable (halt, memory
+/// barriers, etc.).
 class InstARM32 : public InstTarget {
   InstARM32() = delete;
   InstARM32(const InstARM32 &) = delete;
@@ -525,8 +524,8 @@
   static const char *Opcode;
 };
 
-/// Base class for assignment instructions.
-/// These can be tested for redundancy (and elided if redundant).
+/// Base class for assignment instructions. These can be tested for redundancy
+/// (and elided if redundant).
 template <InstARM32::InstKindARM32 K>
 class InstARM32Movlike : public InstARM32Pred {
   InstARM32Movlike() = delete;
@@ -576,8 +575,8 @@
   InstARM32ThreeAddrGPR &operator=(const InstARM32ThreeAddrGPR &) = delete;
 
 public:
-  /// Create an ordinary binary-op instruction like add, and sub.
-  /// Dest and Src1 must be registers.
+  /// Create an ordinary binary-op instruction like add, and sub. Dest and Src1
+  /// must be registers.
   static InstARM32ThreeAddrGPR *create(Cfg *Func, Variable *Dest,
                                        Variable *Src0, Operand *Src1,
                                        CondARM32::Cond Predicate,
@@ -618,10 +617,10 @@
   bool SetFlags;
 };
 
-/// Instructions of the form x := y op z, for vector/FP.  We leave these as
+/// Instructions of the form x := y op z, for vector/FP. We leave these as
 /// unconditional: "ARM deprecates the conditional execution of any instruction
 /// encoding provided by the Advanced SIMD Extension that is not also provided
-/// by the Floating-point (VFP) extension".  They do not set flags.
+/// by the Floating-point (VFP) extension". They do not set flags.
 template <InstARM32::InstKindARM32 K>
 class InstARM32ThreeAddrFP : public InstARM32 {
   InstARM32ThreeAddrFP() = delete;
@@ -629,8 +628,8 @@
   InstARM32ThreeAddrFP &operator=(const InstARM32ThreeAddrFP &) = delete;
 
 public:
-  /// Create a vector/FP binary-op instruction like vadd, and vsub.
-  /// Everything must be a register.
+  /// Create a vector/FP binary-op instruction like vadd, and vsub. Everything
+  /// must be a register.
   static InstARM32ThreeAddrFP *create(Cfg *Func, Variable *Dest, Variable *Src0,
                                       Variable *Src1) {
     return new (Func->allocate<InstARM32ThreeAddrFP>())
@@ -779,24 +778,24 @@
 using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
 using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
 using InstARM32Ldr = InstARM32Movlike<InstARM32::Ldr>;
-/// Move instruction (variable <- flex). This is more of a pseudo-inst.
-/// If var is a register, then we use "mov". If var is stack, then we use
-/// "str" to store to the stack.
+/// Move instruction (variable <- flex). This is more of a pseudo-inst. If var
+/// is a register, then we use "mov". If var is stack, then we use "str" to
+/// store to the stack.
 using InstARM32Mov = InstARM32Movlike<InstARM32::Mov>;
 /// Represents various vector mov instruction forms (simple single source,
 /// single dest forms only, not the 2 GPR <-> 1 D reg forms, etc.).
 using InstARM32Vldr = InstARM32Movlike<InstARM32::Vldr>;
-/// MovT leaves the bottom bits alone so dest is also a source.
-/// This helps indicate that a previous MovW setting dest is not dead code.
+/// MovT leaves the bottom bits alone so dest is also a source. This helps
+/// indicate that a previous MovW setting dest is not dead code.
 using InstARM32Movt = InstARM32TwoAddrGPR<InstARM32::Movt>;
 using InstARM32Movw = InstARM32UnaryopGPR<InstARM32::Movw, false>;
 using InstARM32Clz = InstARM32UnaryopGPR<InstARM32::Clz, false>;
 using InstARM32Mvn = InstARM32UnaryopGPR<InstARM32::Mvn, false>;
 using InstARM32Rbit = InstARM32UnaryopGPR<InstARM32::Rbit, false>;
 using InstARM32Rev = InstARM32UnaryopGPR<InstARM32::Rev, false>;
-// Technically, the uxt{b,h} and sxt{b,h} instructions have a rotation
-// operand as well (rotate source by 8, 16, 24 bits prior to extending),
-// but we aren't using that for now, so just model as a Unaryop.
+// Technically, the uxt{b,h} and sxt{b,h} instructions have a rotation operand
+// as well (rotate source by 8, 16, 24 bits prior to extending), but we aren't
+// using that for now, so just model as a Unaryop.
 using InstARM32Sxt = InstARM32UnaryopGPR<InstARM32::Sxt, true>;
 using InstARM32Uxt = InstARM32UnaryopGPR<InstARM32::Uxt, true>;
 using InstARM32Vsqrt = InstARM32UnaryopFP<InstARM32::Vsqrt>;
@@ -805,9 +804,9 @@
 using InstARM32Cmp = InstARM32CmpLike<InstARM32::Cmp>;
 using InstARM32Tst = InstARM32CmpLike<InstARM32::Tst>;
 
-// InstARM32Label represents an intra-block label that is the target
-// of an intra-block branch.  The offset between the label and the
-// branch must be fit in the instruction immediate (considered "near").
+// InstARM32Label represents an intra-block label that is the target of an
+// intra-block branch. The offset between the label and the branch must be fit
+// in the instruction immediate (considered "near").
 class InstARM32Label : public InstARM32 {
   InstARM32Label() = delete;
   InstARM32Label(const InstARM32Label &) = delete;
@@ -852,9 +851,9 @@
     return new (Func->allocate<InstARM32Br>())
         InstARM32Br(Func, NoCondTarget, Target, NoLabel, CondARM32::AL);
   }
-  /// Create a non-terminator conditional branch to a node, with a
-  /// fallthrough to the next instruction in the current node.  This is
-  /// used for switch lowering.
+  /// Create a non-terminator conditional branch to a node, with a fallthrough
+  /// to the next instruction in the current node. This is used for switch
+  /// lowering.
   static InstARM32Br *create(Cfg *Func, CfgNode *Target,
                              CondARM32::Cond Predicate) {
     assert(Predicate != CondARM32::AL);
@@ -903,18 +902,18 @@
   const InstARM32Label *Label; // Intra-block branch target
 };
 
-/// AdjustStack instruction - subtracts SP by the given amount and
-/// updates the stack offset during code emission.
+/// AdjustStack instruction - subtracts SP by the given amount and updates the
+/// stack offset during code emission.
 class InstARM32AdjustStack : public InstARM32 {
   InstARM32AdjustStack() = delete;
   InstARM32AdjustStack(const InstARM32AdjustStack &) = delete;
   InstARM32AdjustStack &operator=(const InstARM32AdjustStack &) = delete;
 
 public:
-  /// Note: We need both Amount and SrcAmount. If Amount is too large then
-  /// it needs to be copied to a register (so SrcAmount could be a register).
-  /// However, we also need the numeric Amount for bookkeeping, and it's
-  /// hard to pull that from the generic SrcAmount operand.
+  /// Note: We need both Amount and SrcAmount. If Amount is too large then it
+  /// needs to be copied to a register (so SrcAmount could be a register).
+  /// However, we also need the numeric Amount for bookkeeping, and it's hard to
+  /// pull that from the generic SrcAmount operand.
   static InstARM32AdjustStack *create(Cfg *Func, Variable *SP, SizeT Amount,
                                       Operand *SrcAmount) {
     return new (Func->allocate<InstARM32AdjustStack>())
@@ -932,7 +931,7 @@
   const SizeT Amount;
 };
 
-/// Call instruction (bl/blx).  Arguments should have already been pushed.
+/// Call instruction (bl/blx). Arguments should have already been pushed.
 /// Technically bl and the register form of blx can be predicated, but we'll
 /// leave that out until needed.
 class InstARM32Call : public InstARM32 {
@@ -977,8 +976,8 @@
   VarList Dests;
 };
 
-/// Push a list of GPRs. Technically this can be predicated, but we don't
-/// need that functionality.
+/// Push a list of GPRs. Technically this can be predicated, but we don't need
+/// that functionality.
 class InstARM32Push : public InstARM32 {
   InstARM32Push() = delete;
   InstARM32Push(const InstARM32Push &) = delete;
@@ -997,11 +996,11 @@
   InstARM32Push(Cfg *Func, const VarList &Srcs);
 };
 
-/// Ret pseudo-instruction.  This is actually a "bx" instruction with
-/// an "lr" register operand, but epilogue lowering will search for a Ret
-/// instead of a generic "bx". This instruction also takes a Source
-/// operand (for non-void returning functions) for liveness analysis, though
-/// a FakeUse before the ret would do just as well.
+/// Ret pseudo-instruction. This is actually a "bx" instruction with an "lr"
+/// register operand, but epilogue lowering will search for a Ret instead of a
+/// generic "bx". This instruction also takes a Source operand (for non-void
+/// returning functions) for liveness analysis, though a FakeUse before the ret
+/// would do just as well.
 ///
 /// NOTE: Even though "bx" can be predicated, for now leave out the predication
 /// since it's not yet known to be useful for Ret. That may complicate finding
@@ -1025,8 +1024,8 @@
   InstARM32Ret(Cfg *Func, Variable *LR, Variable *Source);
 };
 
-/// Store instruction. It's important for liveness that there is no Dest
-/// operand (OperandARM32Mem instead of Dest Variable).
+/// Store instruction. It's important for liveness that there is no Dest operand
+/// (OperandARM32Mem instead of Dest Variable).
 class InstARM32Str : public InstARM32Pred {
   InstARM32Str() = delete;
   InstARM32Str(const InstARM32Str &) = delete;
@@ -1205,9 +1204,9 @@
   Variable *Dest1 = nullptr;
 };
 
-// Declare partial template specializations of emit() methods that
-// already have default implementations.  Without this, there is the
-// possibility of ODR violations and link errors.
+// Declare partial template specializations of emit() methods that already have
+// default implementations. Without this, there is the possibility of ODR
+// violations and link errors.
 
 template <> void InstARM32Ldr::emit(const Cfg *Func) const;
 template <> void InstARM32Mov::emit(const Cfg *Func) const;
diff --git a/src/IceInstMIPS32.cpp b/src/IceInstMIPS32.cpp
index e386806..7773272 100644
--- a/src/IceInstMIPS32.cpp
+++ b/src/IceInstMIPS32.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 //
 /// \file
-/// This file implements the InstMips32 and OperandMips32 classes,
-/// primarily the constructors and the dump()/emit() methods.
+/// This file implements the InstMips32 and OperandMips32 classes, primarily the
+/// constructors and the dump()/emit() methods.
 ///
 //===----------------------------------------------------------------------===//
 
diff --git a/src/IceInstMIPS32.h b/src/IceInstMIPS32.h
index e426598..1c4863f 100644
--- a/src/IceInstMIPS32.h
+++ b/src/IceInstMIPS32.h
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares the InstMIPS32 and OperandMIPS32 classes and
-/// their subclasses.  This represents the machine instructions and
-/// operands used for MIPS32 code selection.
+/// This file declares the InstMIPS32 and OperandMIPS32 classes and their
+/// subclasses. This represents the machine instructions and operands used for
+/// MIPS32 code selection.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -47,13 +47,13 @@
   }
 };
 
-/// Ret pseudo-instruction.  This is actually a "jr" instruction with
-/// an "ra" register operand, but epilogue lowering will search for a Ret
-/// instead of a generic "jr". This instruction also takes a Source
-/// operand (for non-void returning functions) for liveness analysis, though
-/// a FakeUse before the ret would do just as well.
-/// TODO(reed kotler): This needs was take from the ARM port and needs to be
-/// scrubbed in the future.
+/// Ret pseudo-instruction. This is actually a "jr" instruction with an "ra"
+/// register operand, but epilogue lowering will search for a Ret instead of a
+/// generic "jr". This instruction also takes a Source operand (for non-void
+/// returning functions) for liveness analysis, though a FakeUse before the ret
+/// would do just as well.
+// TODO(reed kotler): This needs was take from the ARM port and needs to be
+// scrubbed in the future.
 class InstMIPS32Ret : public InstMIPS32 {
 
   InstMIPS32Ret() = delete;
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index cfd7fb3..3a8c57c 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -106,9 +106,8 @@
     assert(SegmentReg >= 0 && SegmentReg < SegReg_NUM);
     Str << "%" << X8632::Traits::InstSegmentRegNames[SegmentReg] << ":";
   }
-  // Emit as Offset(Base,Index,1<<Shift).
-  // Offset is emitted without the leading '$'.
-  // Omit the (Base,Index,1<<Shift) part if Base==nullptr.
+  // Emit as Offset(Base,Index,1<<Shift). Offset is emitted without the leading
+  // '$'. Omit the (Base,Index,1<<Shift) part if Base==nullptr.
   if (!Offset) {
     // No offset, emit nothing.
   } else if (const auto CI = llvm::dyn_cast<ConstantInteger32>(Offset)) {
diff --git a/src/IceInstX8664.cpp b/src/IceInstX8664.cpp
index 6d9ccd6..cb765ae 100644
--- a/src/IceInstX8664.cpp
+++ b/src/IceInstX8664.cpp
@@ -92,9 +92,8 @@
   if (!BuildDefs::dump())
     return;
   Ostream &Str = Func->getContext()->getStrEmit();
-  // Emit as Offset(Base,Index,1<<Shift).
-  // Offset is emitted without the leading '$'.
-  // Omit the (Base,Index,1<<Shift) part if Base==nullptr.
+  // Emit as Offset(Base,Index,1<<Shift). Offset is emitted without the leading
+  // '$'. Omit the (Base,Index,1<<Shift) part if Base==nullptr.
   if (!Offset) {
     // No offset, emit nothing.
   } else if (const auto CI = llvm::dyn_cast<ConstantInteger32>(Offset)) {
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 8883902..5ca9422 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -146,9 +146,8 @@
   getOppositeCondition(typename Traits::Cond::BrCond Cond);
   void dump(const Cfg *Func) const override;
 
-  // Shared emit routines for common forms of instructions.
-  // See the definition of emitTwoAddress() for a description of
-  // ShiftHack.
+  // Shared emit routines for common forms of instructions. See the definition
+  // of emitTwoAddress() for a description of ShiftHack.
   static void emitTwoAddress(const char *Opcode, const Inst *Inst,
                              const Cfg *Func, bool ShiftHack = false);
 
@@ -165,16 +164,15 @@
   static bool isClassof(const Inst *Inst, InstKindX86 MyKind) {
     return Inst->getKind() == static_cast<InstKind>(MyKind);
   }
-  // Most instructions that operate on vector arguments require vector
-  // memory operands to be fully aligned (16-byte alignment for PNaCl
-  // vector types).  The stack frame layout and call ABI ensure proper
-  // alignment for stack operands, but memory operands (originating
-  // from load/store bitcode instructions) only have element-size
-  // alignment guarantees.  This function validates that none of the
-  // operands is a memory operand of vector type, calling
-  // report_fatal_error() if one is found.  This function should be
-  // called during emission, and maybe also in the ctor (as long as
-  // that fits the lowering style).
+  // Most instructions that operate on vector arguments require vector memory
+  // operands to be fully aligned (16-byte alignment for PNaCl vector types).
+  // The stack frame layout and call ABI ensure proper alignment for stack
+  // operands, but memory operands (originating from load/store bitcode
+  // instructions) only have element-size alignment guarantees. This function
+  // validates that none of the operands is a memory operand of vector type,
+  // calling report_fatal_error() if one is found. This function should be
+  // called during emission, and maybe also in the ctor (as long as that fits
+  // the lowering style).
   void validateVectorAddrMode() const {
     if (this->getDest())
       this->validateVectorAddrModeOpnd(this->getDest());
@@ -193,8 +191,8 @@
 };
 
 /// InstX86FakeRMW represents a non-atomic read-modify-write operation on a
-/// memory location.  An InstX86FakeRMW is a "fake" instruction in that it
-/// still needs to be lowered to some actual RMW instruction.
+/// memory location. An InstX86FakeRMW is a "fake" instruction in that it still
+/// needs to be lowered to some actual RMW instruction.
 ///
 /// If A is some memory address, D is some data value to apply, and OP is an
 /// arithmetic operator, the instruction operates as: (*A) = (*A) OP D
@@ -228,17 +226,16 @@
                  InstArithmetic::OpKind Op, Variable *Beacon);
 };
 
-/// InstX86Label represents an intra-block label that is the target
-/// of an intra-block branch.  The offset between the label and the
-/// branch must be fit into one byte (considered "near").  These are
-/// used for lowering i1 calculations, Select instructions, and 64-bit
-/// compares on a 32-bit architecture, without basic block splitting.
-/// Basic block splitting is not so desirable for several reasons, one
-/// of which is the impact on decisions based on whether a variable's
-/// live range spans multiple basic blocks.
+/// InstX86Label represents an intra-block label that is the target of an
+/// intra-block branch. The offset between the label and the branch must be fit
+/// into one byte (considered "near"). These are used for lowering i1
+/// calculations, Select instructions, and 64-bit compares on a 32-bit
+/// architecture, without basic block splitting. Basic block splitting is not so
+/// desirable for several reasons, one of which is the impact on decisions based
+/// on whether a variable's live range spans multiple basic blocks.
 ///
-/// Intra-block control flow must be used with caution.  Consider the
-/// sequence for "c = (a >= b ? x : y)".
+/// Intra-block control flow must be used with caution. Consider the sequence
+/// for "c = (a >= b ? x : y)".
 ///     cmp a, b
 ///     br lt, L1
 ///     mov c, x
@@ -247,11 +244,10 @@
 ///     mov c, y
 ///   L2:
 ///
-/// Labels L1 and L2 are intra-block labels.  Without knowledge of the
-/// intra-block control flow, liveness analysis will determine the "mov
-/// c, x" instruction to be dead.  One way to prevent this is to insert
-/// a "FakeUse(c)" instruction anywhere between the two "mov c, ..."
-/// instructions, e.g.:
+/// Labels L1 and L2 are intra-block labels. Without knowledge of the
+/// intra-block control flow, liveness analysis will determine the "mov c, x"
+/// instruction to be dead. One way to prevent this is to insert a "FakeUse(c)"
+/// instruction anywhere between the two "mov c, ..." instructions, e.g.:
 ///
 ///     cmp a, b
 ///     br lt, L1
@@ -262,10 +258,9 @@
 ///     mov c, y
 ///   L2:
 ///
-/// The down-side is that "mov c, x" can never be dead-code eliminated
-/// even if there are no uses of c.  As unlikely as this situation is,
-/// it may be prevented by running dead code elimination before
-/// lowering.
+/// The down-side is that "mov c, x" can never be dead-code eliminated even if
+/// there are no uses of c. As unlikely as this situation is, it may be
+/// prevented by running dead code elimination before lowering.
 template <class Machine>
 class InstX86Label final : public InstX86Base<Machine> {
   InstX86Label() = delete;
@@ -319,9 +314,9 @@
         InstX86Br(Func, NoCondTarget, Target, NoLabel,
                   InstX86Base<Machine>::Traits::Cond::Br_None, Kind);
   }
-  /// Create a non-terminator conditional branch to a node, with a
-  /// fallthrough to the next instruction in the current node.  This is
-  /// used for switch lowering.
+  /// Create a non-terminator conditional branch to a node, with a fallthrough
+  /// to the next instruction in the current node. This is used for switch
+  /// lowering.
   static InstX86Br *
   create(Cfg *Func, CfgNode *Target,
          typename InstX86Base<Machine>::Traits::Cond::BrCond Condition,
@@ -381,9 +376,9 @@
   const Mode Kind;
 };
 
-/// Jump to a target outside this function, such as tailcall, nacljump,
-/// naclret, unreachable.  This is different from a Branch instruction
-/// in that there is no intra-function control flow to represent.
+/// Jump to a target outside this function, such as tailcall, nacljump, naclret,
+/// unreachable. This is different from a Branch instruction in that there is no
+/// intra-function control flow to represent.
 template <class Machine> class InstX86Jmp final : public InstX86Base<Machine> {
   InstX86Jmp() = delete;
   InstX86Jmp(const InstX86Jmp &) = delete;
@@ -405,8 +400,8 @@
   InstX86Jmp(Cfg *Func, Operand *Target);
 };
 
-/// AdjustStack instruction - subtracts esp by the given amount and
-/// updates the stack offset during code emission.
+/// AdjustStack instruction - subtracts esp by the given amount and updates the
+/// stack offset during code emission.
 template <class Machine>
 class InstX86AdjustStack final : public InstX86Base<Machine> {
   InstX86AdjustStack() = delete;
@@ -431,7 +426,7 @@
   SizeT Amount;
 };
 
-/// Call instruction.  Arguments should have already been pushed.
+/// Call instruction. Arguments should have already been pushed.
 template <class Machine> class InstX86Call final : public InstX86Base<Machine> {
   InstX86Call() = delete;
   InstX86Call(const InstX86Call &) = delete;
@@ -514,8 +509,8 @@
       Emitter;
 };
 
-/// Emit a two-operand (GPR) instruction, where the dest operand is a
-/// Variable that's guaranteed to be a register.
+/// Emit a two-operand (GPR) instruction, where the dest operand is a Variable
+/// that's guaranteed to be a register.
 template <class Machine, bool VarCanBeByte = true, bool SrcCanBeByte = true>
 void emitIASRegOpTyGPR(
     const Cfg *Func, Type Ty, const Variable *Dst, const Operand *Src,
@@ -540,9 +535,9 @@
     Type SrcTy = this->getSrc(0)->getType();
     Type DestTy = this->getDest()->getType();
     Str << "\t" << Opcode << this->getWidthString(SrcTy);
-    // Movsx and movzx need both the source and dest type width letter
-    // to define the operation.  The other unary operations have the
-    // same source and dest type and as a result need only one letter.
+    // Movsx and movzx need both the source and dest type width letter to
+    // define the operation. The other unary operations have the same source
+    // and dest type and as a result need only one letter.
     if (SrcTy != DestTy)
       Str << this->getWidthString(DestTy);
     Str << "\t";
@@ -1181,8 +1176,8 @@
                                                                Source) {}
 };
 
-/// Move packed - copy 128 bit values between XMM registers, or mem128
-/// and XMM registers.
+/// Move packed - copy 128 bit values between XMM registers, or mem128 and XMM
+/// registers.
 template <class Machine>
 class InstX86Movp
     : public InstX86BaseMovlike<Machine, InstX86Base<Machine>::Movp> {
@@ -1865,13 +1860,12 @@
             Func, Dest, Source) {}
 };
 
-/// movss is only a binary operation when the source and dest
-/// operands are both registers (the high bits of dest are left untouched).
-/// In other cases, it behaves like a copy (mov-like) operation (and the
-/// high bits of dest are cleared).
-/// InstX86Movss will assert that both its source and dest operands are
-/// registers, so the lowering code should use _mov instead of _movss
-/// in cases where a copy operation is intended.
+/// movss is only a binary operation when the source and dest operands are both
+/// registers (the high bits of dest are left untouched). In other cases, it
+/// behaves like a copy (mov-like) operation (and the high bits of dest are
+/// cleared). InstX86Movss will assert that both its source and dest operands
+/// are registers, so the lowering code should use _mov instead of _movss in
+/// cases where a copy operation is intended.
 template <class Machine>
 class InstX86MovssRegs
     : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::MovssRegs,
@@ -2072,8 +2066,8 @@
                       typename InstX86Base<Machine>::InstKindX86 Kind,
                       SizeT Maxsrcs, Variable *Dest, bool Locked)
       : InstX86Base<Machine>(Func, Kind, Maxsrcs, Dest), Locked(Locked) {
-    // Assume that such instructions are used for Atomics and be careful
-    // with optimizations.
+    // Assume that such instructions are used for Atomics and be careful with
+    // optimizations.
     this->HasSideEffects = Locked;
   }
 };
@@ -2174,8 +2168,7 @@
   typename InstX86Base<Machine>::Traits::Cond::BrCond Condition;
 };
 
-/// Cmpps instruction - compare packed singled-precision floating point
-/// values
+/// Cmpps instruction - compare packed singled-precision floating point values
 template <class Machine>
 class InstX86Cmpps final : public InstX86Base<Machine> {
   InstX86Cmpps() = delete;
@@ -2204,10 +2197,10 @@
 };
 
 /// Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest>
-/// equals eax. If so, the ZF is set and <desired> is stored in <dest>.
-/// If not, ZF is cleared and <dest> is copied to eax (or subregister).
-/// <dest> can be a register or memory, while <desired> must be a register.
-/// It is the user's responsiblity to mark eax with a FakeDef.
+/// equals eax. If so, the ZF is set and <desired> is stored in <dest>. If not,
+/// ZF is cleared and <dest> is copied to eax (or subregister). <dest> can be a
+/// register or memory, while <desired> must be a register. It is the user's
+/// responsibility to mark eax with a FakeDef.
 template <class Machine>
 class InstX86Cmpxchg final : public InstX86BaseLockable<Machine> {
   InstX86Cmpxchg() = delete;
@@ -2232,12 +2225,11 @@
                  Variable *Desired, bool Locked);
 };
 
-/// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64>
-/// equals edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>.
-/// If not, ZF is cleared and <m64> is copied to edx:eax.
-/// The caller is responsible for inserting FakeDefs to mark edx
-/// and eax as modified.
-/// <m64> must be a memory operand.
+/// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64> equals
+/// edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>. If not, ZF is
+/// cleared and <m64> is copied to edx:eax. The caller is responsible for
+/// inserting FakeDefs to mark edx and eax as modified. <m64> must be a memory
+/// operand.
 template <class Machine>
 class InstX86Cmpxchg8b final : public InstX86BaseLockable<Machine> {
   InstX86Cmpxchg8b() = delete;
@@ -2267,10 +2259,10 @@
                    bool Locked);
 };
 
-/// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i}
-/// as appropriate.  s=float, d=double, i=int.  X and Y are determined
-/// from dest/src types.  Sign and zero extension on the integer
-/// operand needs to be done separately.
+/// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i} as
+/// appropriate.  s=float, d=double, i=int. X and Y are determined from dest/src
+/// types. Sign and zero extension on the integer operand needs to be done
+/// separately.
 template <class Machine> class InstX86Cvt final : public InstX86Base<Machine> {
   InstX86Cvt() = delete;
   InstX86Cvt(const InstX86Cvt &) = delete;
@@ -2406,9 +2398,8 @@
 };
 
 /// This is essentially a "mov" instruction with an
-/// InstX86Base<Machine>::Traits::X86OperandMem
-/// operand instead of Variable as the destination.  It's important
-/// for liveness that there is no Dest operand.
+/// InstX86Base<Machine>::Traits::X86OperandMem operand instead of Variable as
+/// the destination. It's important for liveness that there is no Dest operand.
 template <class Machine>
 class InstX86Store final : public InstX86Base<Machine> {
   InstX86Store() = delete;
@@ -2434,10 +2425,9 @@
 };
 
 /// This is essentially a vector "mov" instruction with an typename
-/// InstX86Base<Machine>::Traits::X86OperandMem
-/// operand instead of Variable as the destination.  It's important
-/// for liveness that there is no Dest operand. The source must be an
-/// Xmm register, since Dest is mem.
+/// InstX86Base<Machine>::Traits::X86OperandMem operand instead of Variable as
+/// the destination. It's important for liveness that there is no Dest operand.
+/// The source must be an Xmm register, since Dest is mem.
 template <class Machine>
 class InstX86StoreP final : public InstX86Base<Machine> {
   InstX86StoreP() = delete;
@@ -2596,10 +2586,10 @@
   InstX86Push(Cfg *Func, Variable *Source);
 };
 
-/// Ret instruction.  Currently only supports the "ret" version that
-/// does not pop arguments.  This instruction takes a Source operand
-/// (for non-void returning functions) for liveness analysis, though
-/// a FakeUse before the ret would do just as well.
+/// Ret instruction. Currently only supports the "ret" version that does not pop
+/// arguments. This instruction takes a Source operand (for non-void returning
+/// functions) for liveness analysis, though a FakeUse before the ret would do
+/// just as well.
 template <class Machine> class InstX86Ret final : public InstX86Base<Machine> {
   InstX86Ret() = delete;
   InstX86Ret(const InstX86Ret &) = delete;
@@ -2647,10 +2637,10 @@
   const typename InstX86Base<Machine>::Traits::Cond::BrCond Condition;
 };
 
-/// Exchanging Add instruction.  Exchanges the first operand (destination
-/// operand) with the second operand (source operand), then loads the sum
-/// of the two values into the destination operand. The destination may be
-/// a register or memory, while the source must be a register.
+/// Exchanging Add instruction. Exchanges the first operand (destination
+/// operand) with the second operand (source operand), then loads the sum of the
+/// two values into the destination operand. The destination may be a register
+/// or memory, while the source must be a register.
 ///
 /// Both the dest and source are updated. The caller should then insert a
 /// FakeDef to reflect the second udpate.
@@ -2677,12 +2667,11 @@
   InstX86Xadd(Cfg *Func, Operand *Dest, Variable *Source, bool Locked);
 };
 
-/// Exchange instruction.  Exchanges the first operand (destination
-/// operand) with the second operand (source operand). At least one of
-/// the operands must be a register (and the other can be reg or mem).
-/// Both the Dest and Source are updated. If there is a memory operand,
-/// then the instruction is automatically "locked" without the need for
-/// a lock prefix.
+/// Exchange instruction. Exchanges the first operand (destination operand) with
+/// the second operand (source operand). At least one of the operands must be a
+/// register (and the other can be reg or mem). Both the Dest and Source are
+/// updated. If there is a memory operand, then the instruction is automatically
+/// "locked" without the need for a lock prefix.
 template <class Machine> class InstX86Xchg final : public InstX86Base<Machine> {
   InstX86Xchg() = delete;
   InstX86Xchg(const InstX86Xchg &) = delete;
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 336e268..677a1d3 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -112,16 +112,14 @@
 
 template <class Machine>
 bool InstX86Br<Machine>::optimizeBranch(const CfgNode *NextNode) {
-  // If there is no next block, then there can be no fallthrough to
-  // optimize.
+  // If there is no next block, then there can be no fallthrough to optimize.
   if (NextNode == nullptr)
     return false;
   // Intra-block conditional branches can't be optimized.
   if (Label)
     return false;
-  // If there is no fallthrough node, such as a non-default case label
-  // for a switch instruction, then there is no opportunity to
-  // optimize.
+  // If there is no fallthrough node, such as a non-default case label for a
+  // switch instruction, then there is no opportunity to optimize.
   if (getTargetFalse() == nullptr)
     return false;
 
@@ -132,15 +130,15 @@
     this->setDeleted();
     return true;
   }
-  // If the fallthrough is to the next node, set fallthrough to nullptr
-  // to indicate.
+  // If the fallthrough is to the next node, set fallthrough to nullptr to
+  // indicate.
   if (getTargetFalse() == NextNode) {
     TargetFalse = nullptr;
     return true;
   }
-  // If TargetTrue is the next node, and TargetFalse is not nullptr
-  // (which was already tested above), then invert the branch
-  // condition, swap the targets, and set new fallthrough to nullptr.
+  // If TargetTrue is the next node, and TargetFalse is not nullptr (which was
+  // already tested above), then invert the branch condition, swap the targets,
+  // and set new fallthrough to nullptr.
   if (getTargetTrue() == NextNode) {
     assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
     Condition = this->getOppositeCondition(Condition);
@@ -185,8 +183,8 @@
     typename InstX86Base<Machine>::Traits::Cond::BrCond Condition)
     : InstX86Base<Machine>(Func, InstX86Base<Machine>::Cmov, 2, Dest),
       Condition(Condition) {
-  // The final result is either the original Dest, or Source, so mark
-  // both as sources.
+  // The final result is either the original Dest, or Source, so mark both as
+  // sources.
   this->addSource(Dest);
   this->addSource(Source);
 }
@@ -320,12 +318,11 @@
 template <class Machine>
 InstX86Pop<Machine>::InstX86Pop(Cfg *Func, Variable *Dest)
     : InstX86Base<Machine>(Func, InstX86Base<Machine>::Pop, 0, Dest) {
-  // A pop instruction affects the stack pointer and so it should not
-  // be allowed to be automatically dead-code eliminated.  (The
-  // corresponding push instruction doesn't need this treatment
-  // because it has no dest variable and therefore won't be dead-code
-  // eliminated.)  This is needed for late-stage liveness analysis
-  // (e.g. asm-verbose mode).
+  // A pop instruction affects the stack pointer and so it should not be
+  // allowed to be automatically dead-code eliminated. (The corresponding push
+  // instruction doesn't need this treatment because it has no dest variable
+  // and therefore won't be dead-code eliminated.) This is needed for
+  // late-stage liveness analysis (e.g. asm-verbose mode).
   this->HasSideEffects = true;
 }
 
@@ -529,11 +526,10 @@
       Asm->jmp(InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
           Var->getRegNum()));
     } else {
-      // The jmp instruction with a memory operand should be possible
-      // to encode, but it isn't a valid sandboxed instruction, and
-      // there shouldn't be a register allocation issue to jump
-      // through a scratch register, so we don't really need to bother
-      // implementing it.
+      // The jmp instruction with a memory operand should be possible to
+      // encode, but it isn't a valid sandboxed instruction, and there
+      // shouldn't be a register allocation issue to jump through a scratch
+      // register, so we don't really need to bother implementing it.
       llvm::report_fatal_error("Assembler can't jmp to memory operand");
     }
   } else if (const auto Mem = llvm::dyn_cast<
@@ -548,11 +544,10 @@
     Asm->jmp(CR);
   } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Target)) {
     // NaCl trampoline calls refer to an address within the sandbox directly.
-    // This is usually only needed for non-IRT builds and otherwise not
-    // very portable or stable. Usually this is only done for "calls"
-    // and not jumps.
-    // TODO(jvoung): Support this when there is a lowering that
-    // actually triggers this case.
+    // This is usually only needed for non-IRT builds and otherwise not very
+    // portable or stable. Usually this is only done for "calls" and not jumps.
+    // TODO(jvoung): Support this when there is a lowering that actually
+    // triggers this case.
     (void)Imm;
     llvm::report_fatal_error("Unexpected jmp to absolute address");
   } else {
@@ -633,10 +628,9 @@
   getCallTarget()->dump(Func);
 }
 
-// The ShiftHack parameter is used to emit "cl" instead of "ecx" for
-// shift instructions, in order to be syntactically valid.  The
-// this->Opcode parameter needs to be char* and not IceString because of
-// template issues.
+// The ShiftHack parameter is used to emit "cl" instead of "ecx" for shift
+// instructions, in order to be syntactically valid. The this->Opcode parameter
+// needs to be char* and not IceString because of template issues.
 template <class Machine>
 void InstX86Base<Machine>::emitTwoAddress(const char *Opcode, const Inst *Inst,
                                           const Cfg *Func, bool ShiftHack) {
@@ -802,15 +796,14 @@
         &Emitter) {
   typename InstX86Base<Machine>::Traits::Assembler *Asm =
       Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
-  // Technically, the Dest Var can be mem as well, but we only use Reg.
-  // We can extend this to check Dest if we decide to use that form.
+  // Technically, the Dest Var can be mem as well, but we only use Reg. We can
+  // extend this to check Dest if we decide to use that form.
   assert(Var->hasReg());
   // We cheat a little and use GPRRegister even for byte operations.
   typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister VarReg =
       InstX86Base<Machine>::Traits::RegisterSet::getEncodedByteRegOrGPR(
           Ty, Var->getRegNum());
-  // Src must be reg == ECX or an Imm8.
-  // This is asserted by the assembler.
+  // Src must be reg == ECX or an Imm8. This is asserted by the assembler.
   if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
     assert(SrcVar->hasReg());
     typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister SrcReg =
@@ -1337,8 +1330,8 @@
         &InstX86Base<Machine>::Traits::Assembler::imul};
     emitIASOpTyGPR<Machine>(Func, Ty, this->getSrc(1), Emitter);
   } else {
-    // We only use imul as a two-address instruction even though
-    // there is a 3 operand version when one of the operands is a constant.
+    // We only use imul as a two-address instruction even though there is a 3
+    // operand version when one of the operands is a constant.
     assert(Var == this->getSrc(0));
     static const typename InstX86Base<
         Machine>::Traits::Assembler::GPREmitterRegOp Emitter = {
@@ -1678,8 +1671,8 @@
       Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
   assert(this->getSrcSize() == 2);
   assert(Condition < InstX86Base<Machine>::Traits::Cond::Cmpps_Invalid);
-  // Assuming there isn't any load folding for cmpps, and vector constants
-  // are not allowed in PNaCl.
+  // Assuming there isn't any load folding for cmpps, and vector constants are
+  // not allowed in PNaCl.
   assert(llvm::isa<Variable>(this->getSrc(1)));
   const auto SrcVar = llvm::cast<Variable>(this->getSrc(1));
   if (SrcVar->hasReg()) {
@@ -1988,8 +1981,8 @@
 template <class Machine>
 void InstX86Ucomiss<Machine>::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 2);
-  // Currently src0 is always a variable by convention, to avoid having
-  // two memory operands.
+  // Currently src0 is always a variable by convention, to avoid having two
+  // memory operands.
   assert(llvm::isa<Variable>(this->getSrc(0)));
   const auto Src0Var = llvm::cast<Variable>(this->getSrc(0));
   Type Ty = Src0Var->getType();
@@ -2291,16 +2284,16 @@
                 : InstX86Base<Machine>::Traits::TypeAttributes[DestTy]
                       .SdSsString) << "\t";
   }
-  // For an integer truncation operation, src is wider than dest.
-  // Ideally, we use a mov instruction whose data width matches the
-  // narrower dest.  This is a problem if e.g. src is a register like
-  // esi or si where there is no 8-bit version of the register.  To be
-  // safe, we instead widen the dest to match src.  This works even
-  // for stack-allocated dest variables because typeWidthOnStack()
-  // pads to a 4-byte boundary even if only a lower portion is used.
-  // TODO: This assert disallows usages such as copying a floating point
-  // value between a vector and a scalar (which movss is used for).
-  // Clean this up.
+  // For an integer truncation operation, src is wider than dest. Ideally, we
+  // use a mov instruction whose data width matches the narrower dest. This is
+  // a problem if e.g. src is a register like esi or si where there is no 8-bit
+  // version of the register. To be safe, we instead widen the dest to match
+  // src. This works even for stack-allocated dest variables because
+  // typeWidthOnStack() pads to a 4-byte boundary even if only a lower portion
+  // is used.
+  // TODO: This assert disallows usages such as copying a floating
+  // point value between a vector and a scalar (which movss is used for). Clean
+  // this up.
   assert(Func->getTarget()->typeWidthInBytesOnStack(DestTy) ==
          Func->getTarget()->typeWidthInBytesOnStack(SrcTy));
   Src->emit(Func);
@@ -2316,12 +2309,11 @@
   Type DestTy = Dest->getType();
   Type SrcTy = Src->getType();
   // Mov can be used for GPRs or XMM registers. Also, the type does not
-  // necessarily match (Mov can be used for bitcasts). However, when
-  // the type does not match, one of the operands must be a register.
-  // Thus, the strategy is to find out if Src or Dest are a register,
-  // then use that register's type to decide on which emitter set to use.
-  // The emitter set will include reg-reg movs, but that case should
-  // be unused when the types don't match.
+  // necessarily match (Mov can be used for bitcasts). However, when the type
+  // does not match, one of the operands must be a register. Thus, the strategy
+  // is to find out if Src or Dest are a register, then use that register's
+  // type to decide on which emitter set to use. The emitter set will include
+  // reg-reg movs, but that case should be unused when the types don't match.
   static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
       XmmRegEmitter = {&InstX86Base<Machine>::Traits::Assembler::movss,
                        &InstX86Base<Machine>::Traits::Assembler::movss};
@@ -2333,16 +2325,16 @@
       Machine>::Traits::Assembler::GPREmitterAddrOp GPRAddrEmitter = {
       &InstX86Base<Machine>::Traits::Assembler::mov,
       &InstX86Base<Machine>::Traits::Assembler::mov};
-  // For an integer truncation operation, src is wider than dest.
-  // Ideally, we use a mov instruction whose data width matches the
-  // narrower dest.  This is a problem if e.g. src is a register like
-  // esi or si where there is no 8-bit version of the register.  To be
-  // safe, we instead widen the dest to match src.  This works even
-  // for stack-allocated dest variables because typeWidthOnStack()
-  // pads to a 4-byte boundary even if only a lower portion is used.
-  // TODO: This assert disallows usages such as copying a floating point
-  // value between a vector and a scalar (which movss is used for).
-  // Clean this up.
+  // For an integer truncation operation, src is wider than dest. Ideally, we
+  // use a mov instruction whose data width matches the narrower dest. This is
+  // a problem if e.g. src is a register like esi or si where there is no 8-bit
+  // version of the register. To be safe, we instead widen the dest to match
+  // src. This works even for stack-allocated dest variables because
+  // typeWidthOnStack() pads to a 4-byte boundary even if only a lower portion
+  // is used.
+  // TODO: This assert disallows usages such as copying a floating
+  // point value between a vector and a scalar (which movss is used for). Clean
+  // this up.
   assert(
       Func->getTarget()->typeWidthInBytesOnStack(this->getDest()->getType()) ==
       Func->getTarget()->typeWidthInBytesOnStack(Src->getType()));
@@ -2375,8 +2367,8 @@
       return;
     }
   } else {
-    // Dest must be Stack and Src *could* be a register. Use Src's type
-    // to decide on the emitters.
+    // Dest must be Stack and Src *could* be a register. Use Src's type to
+    // decide on the emitters.
     typename InstX86Base<Machine>::Traits::Address StackAddr(
         static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
             Func->getTarget())
@@ -2409,8 +2401,8 @@
   assert(this->getSrcSize() == 1);
   const Variable *Dest = this->getDest();
   const auto SrcVar = llvm::cast<Variable>(this->getSrc(0));
-  // For insert/extract element (one of Src/Dest is an Xmm vector and
-  // the other is an int type).
+  // For insert/extract element (one of Src/Dest is an Xmm vector and the other
+  // is an int type).
   if (SrcVar->getType() == IceType_i32 ||
       (InstX86Base<Machine>::Traits::Is64Bit &&
        SrcVar->getType() == IceType_i64)) {
@@ -2464,10 +2456,9 @@
 void InstX86Movp<Machine>::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
-  // TODO(wala,stichnot): movups works with all vector operands, but
-  // there exist other instructions (movaps, movdqa, movdqu) that may
-  // perform better, depending on the data type and alignment of the
-  // operands.
+  // TODO(wala,stichnot): movups works with all vector operands, but there
+  // exist other instructions (movaps, movdqa, movdqu) that may perform better,
+  // depending on the data type and alignment of the operands.
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(this->getSrcSize() == 1);
   Str << "\tmovups\t";
@@ -2521,8 +2512,8 @@
 
 template <class Machine>
 void InstX86MovssRegs<Machine>::emitIAS(const Cfg *Func) const {
-  // This is Binop variant is only intended to be used for reg-reg moves
-  // where part of the Dest register is untouched.
+  // This is Binop variant is only intended to be used for reg-reg moves where
+  // part of the Dest register is untouched.
   assert(this->getSrcSize() == 2);
   const Variable *Dest = this->getDest();
   assert(Dest == this->getSrc(0));
@@ -2542,9 +2533,9 @@
   assert(this->getSrcSize() == 1);
   const Variable *Dest = this->getDest();
   const Operand *Src = this->getSrc(0);
-  // Dest must be a > 8-bit register, but Src can be 8-bit. In practice
-  // we just use the full register for Dest to avoid having an
-  // OperandSizeOverride prefix. It also allows us to only dispatch on SrcTy.
+  // Dest must be a > 8-bit register, but Src can be 8-bit. In practice we just
+  // use the full register for Dest to avoid having an OperandSizeOverride
+  // prefix. It also allows us to only dispatch on SrcTy.
   Type SrcTy = Src->getType();
   assert(typeWidthInBytes(Dest->getType()) > 1);
   assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy));
@@ -2596,8 +2587,8 @@
   SizeT Width = typeWidthInBytes(Ty);
   const auto Var = llvm::dyn_cast<Variable>(this->getSrc(0));
   if (Var && Var->hasReg()) {
-    // This is a physical xmm register, so we need to spill it to a
-    // temporary stack slot.
+    // This is a physical xmm register, so we need to spill it to a temporary
+    // stack slot.
     Str << "\tsubl\t$" << Width << ", %esp"
         << "\n";
     Str << "\tmov"
@@ -2622,8 +2613,8 @@
   Type Ty = Src->getType();
   if (const auto Var = llvm::dyn_cast<Variable>(Src)) {
     if (Var->hasReg()) {
-      // This is a physical xmm register, so we need to spill it to a
-      // temporary stack slot.
+      // This is a physical xmm register, so we need to spill it to a temporary
+      // stack slot.
       Immediate Width(typeWidthInBytes(Ty));
       Asm->sub(IceType_i32,
                InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
@@ -2672,9 +2663,8 @@
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(this->getSrcSize() == 0);
   // TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to
-  // "partially" delete the fstp if the Dest is unused.
-  // Even if Dest is unused, the fstp should be kept for the SideEffects
-  // of popping the stack.
+  // "partially" delete the fstp if the Dest is unused. Even if Dest is unused,
+  // the fstp should be kept for the SideEffects of popping the stack.
   if (!this->getDest()) {
     Str << "\tfstp\tst(0)";
     return;
@@ -2686,10 +2676,9 @@
     this->getDest()->emit(Func);
     return;
   }
-  // Dest is a physical (xmm) register, so st(0) needs to go through
-  // memory.  Hack this by creating a temporary stack slot, spilling
-  // st(0) there, loading it into the xmm register, and deallocating
-  // the stack slot.
+  // Dest is a physical (xmm) register, so st(0) needs to go through memory.
+  // Hack this by creating a temporary stack slot, spilling st(0) there,
+  // loading it into the xmm register, and deallocating the stack slot.
   Str << "\tsubl\t$" << Width << ", %esp\n";
   Str << "\tfstp" << this->getFldString(Ty) << "\t"
       << "(%esp)\n";
@@ -2708,9 +2697,8 @@
   assert(this->getSrcSize() == 0);
   const Variable *Dest = this->getDest();
   // TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to
-  // "partially" delete the fstp if the Dest is unused.
-  // Even if Dest is unused, the fstp should be kept for the SideEffects
-  // of popping the stack.
+  // "partially" delete the fstp if the Dest is unused. Even if Dest is unused,
+  // the fstp should be kept for the SideEffects of popping the stack.
   if (!Dest) {
     Asm->fstp(InstX86Base<Machine>::Traits::RegisterSet::getEncodedSTReg(0));
     return;
@@ -2723,10 +2711,9 @@
             ->stackVarToAsmOperand(Dest));
     Asm->fstp(Ty, StackAddr);
   } else {
-    // Dest is a physical (xmm) register, so st(0) needs to go through
-    // memory.  Hack this by creating a temporary stack slot, spilling
-    // st(0) there, loading it into the xmm register, and deallocating
-    // the stack slot.
+    // Dest is a physical (xmm) register, so st(0) needs to go through memory.
+    // Hack this by creating a temporary stack slot, spilling st(0) there,
+    // loading it into the xmm register, and deallocating the stack slot.
     Immediate Width(typeWidthInBytes(Ty));
     Asm->sub(IceType_i32,
              InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, Width);
@@ -2796,9 +2783,9 @@
   this->getSrc(0)->emit(Func);
   Str << ", ";
   Variable *Dest = this->getDest();
-  // pextrw must take a register dest. There is an SSE4.1 version that takes
-  // a memory dest, but we aren't using it. For uniformity, just restrict
-  // them all to have a register dest for now.
+  // pextrw must take a register dest. There is an SSE4.1 version that takes a
+  // memory dest, but we aren't using it. For uniformity, just restrict them
+  // all to have a register dest for now.
   assert(Dest->hasReg());
   Dest->asType(IceType_i32)->emit(Func);
 }
@@ -2813,9 +2800,9 @@
          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
              Func->getTarget())
                  ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
-  // pextrw must take a register dest. There is an SSE4.1 version that takes
-  // a memory dest, but we aren't using it. For uniformity, just restrict
-  // them all to have a register dest for now.
+  // pextrw must take a register dest. There is an SSE4.1 version that takes a
+  // memory dest, but we aren't using it. For uniformity, just restrict them
+  // all to have a register dest for now.
   assert(Dest->hasReg());
   // pextrw's Src(0) must be a register (both SSE4.1 and SSE2).
   assert(llvm::cast<Variable>(this->getSrc(0))->hasReg());
@@ -2876,10 +2863,9 @@
          static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
              Func->getTarget())
                  ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
-  // If src1 is a register, it should always be r32 (this should fall out
-  // from the encodings for ByteRegs overlapping the encodings for r32),
-  // but we have to trust the regalloc to not choose "ah", where it
-  // doesn't overlap.
+  // If src1 is a register, it should always be r32 (this should fall out from
+  // the encodings for ByteRegs overlapping the encodings for r32), but we have
+  // to trust the regalloc to not choose "ah", where it doesn't overlap.
   static const typename InstX86Base<Machine>::Traits::Assembler::
       template ThreeOpImmEmitter<
           typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
diff --git a/src/IceIntrinsics.cpp b/src/IceIntrinsics.cpp
index 1dc25cc..bbbf086 100644
--- a/src/IceIntrinsics.cpp
+++ b/src/IceIntrinsics.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements the Intrinsics utilities for matching and
-/// then dispatching by name.
+/// This file implements the Intrinsics utilities for matching and then
+/// dispatching by name.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -33,8 +33,8 @@
 #define INTRIN(ID, SE, RT)                                                     \
   { Intrinsics::ID, Intrinsics::SE, Intrinsics::RT }
 
-// Build list of intrinsics with their attributes and expected prototypes.
-// List is sorted alphabetically.
+// Build list of intrinsics with their attributes and expected prototypes. List
+// is sorted alphabetically.
 const struct IceIntrinsicsEntry_ {
   Intrinsics::FullIntrinsicInfo Info;
   const char *IntrinsicName;
@@ -279,8 +279,8 @@
   case AtomicRMW:
     return true;
   case AtomicCmpxchg:
-    // Reject orderings that are disallowed by C++11 as invalid
-    // combinations for cmpxchg.
+    // Reject orderings that are disallowed by C++11 as invalid combinations
+    // for cmpxchg.
     switch (OrderOther) {
     case MemoryOrderRelaxed:
     case MemoryOrderConsume:
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index 9270aa4..208c3c1 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -62,11 +62,10 @@
     Trap
   };
 
-  /// Operations that can be represented by the AtomicRMW
-  /// intrinsic.
+  /// Operations that can be represented by the AtomicRMW intrinsic.
   ///
-  /// Do not reorder these values: their order offers forward
-  /// compatibility of bitcode targeted to PNaCl.
+  /// Do not reorder these values: their order offers forward compatibility of
+  /// bitcode targeted to PNaCl.
   enum AtomicRMWOperation {
     AtomicInvalid = 0, // Invalid, keep first.
     AtomicAdd,
@@ -80,8 +79,8 @@
 
   /// Memory orderings supported by PNaCl IR.
   ///
-  /// Do not reorder these values: their order offers forward
-  /// compatibility of bitcode targeted to PNaCl.
+  /// Do not reorder these values: their order offers forward compatibility of
+  /// bitcode targeted to PNaCl.
   enum MemoryOrder {
     MemoryOrderInvalid = 0, // Invalid, keep first.
     MemoryOrderRelaxed,
@@ -93,11 +92,11 @@
     MemoryOrderNum // Invalid, keep last.
   };
 
-  /// Verify memory ordering rules for atomic intrinsics.  For
-  /// AtomicCmpxchg, Order is the "success" ordering and OrderOther is
-  /// the "failure" ordering.  Returns true if valid, false if invalid.
-  // TODO(stichnot,kschimpf): Perform memory order validation in the
-  // bitcode reader/parser, allowing LLVM and Subzero to share.  See
+  /// Verify memory ordering rules for atomic intrinsics. For AtomicCmpxchg,
+  /// Order is the "success" ordering and OrderOther is the "failure" ordering.
+  /// Returns true if valid, false if invalid.
+  // TODO(stichnot,kschimpf): Perform memory order validation in the bitcode
+  // reader/parser, allowing LLVM and Subzero to share. See
   // https://code.google.com/p/nativeclient/issues/detail?id=4126 .
   static bool isMemoryOrderValid(IntrinsicID ID, uint64_t Order,
                                  uint64_t OrderOther = MemoryOrderInvalid);
@@ -106,10 +105,10 @@
 
   enum ReturnsTwice { ReturnsTwice_F = 0, ReturnsTwice_T = 1 };
 
-  /// Basic attributes related to each intrinsic, that are relevant to
-  /// code generation. Perhaps the attributes representation can be shared
-  /// with general function calls, but PNaCl currently strips all
-  /// attributes from functions.
+  /// Basic attributes related to each intrinsic, that are relevant to code
+  /// generation. Perhaps the attributes representation can be shared with
+  /// general function calls, but PNaCl currently strips all attributes from
+  /// functions.
   struct IntrinsicInfo {
     enum IntrinsicID ID : 30;
     enum SideEffects HasSideEffects : 1;
@@ -132,9 +131,9 @@
     Type Signature[kMaxIntrinsicParameters];
     uint8_t NumTypes;
 
-    /// Validates that type signature of call matches intrinsic.
-    /// If WrongArgumentType is returned, ArgIndex is set to corresponding
-    /// argument index.
+    /// Validates that type signature of call matches intrinsic. If
+    /// WrongArgumentType is returned, ArgIndex is set to corresponding argument
+    /// index.
     ValidateCallValue validateCall(const Ice::InstCall *Call,
                                    SizeT &ArgIndex) const;
 
@@ -154,11 +153,11 @@
     Type getArgType(SizeT Index) const;
   };
 
-  /// Find the information about a given intrinsic, based on function name.  If
+  /// Find the information about a given intrinsic, based on function name. If
   /// the function name does not have the common "llvm." prefix, nullptr is
-  /// returned and Error is set to false.  Otherwise, tries to find a reference
-  /// to a FullIntrinsicInfo entry (valid for the lifetime of the map).  If
-  /// found, sets Error to false and returns the reference.  If not found, sets
+  /// returned and Error is set to false. Otherwise, tries to find a reference
+  /// to a FullIntrinsicInfo entry (valid for the lifetime of the map). If
+  /// found, sets Error to false and returns the reference. If not found, sets
   /// Error to true and returns nullptr (indicating an unknown "llvm.foo"
   /// intrinsic).
   const FullIntrinsicInfo *find(const IceString &Name, bool &Error) const;
diff --git a/src/IceLiveness.cpp b/src/IceLiveness.cpp
index 15877b2..25cfd09 100644
--- a/src/IceLiveness.cpp
+++ b/src/IceLiveness.cpp
@@ -8,15 +8,14 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file provides some of the support for the Liveness class.  In
-/// particular, it handles the sparsity representation of the mapping
-/// between Variables and CfgNodes.  The idea is that since most
-/// variables are used only within a single basic block, we can
-/// partition the variables into "local" and "global" sets.  Instead of
-/// sizing and indexing vectors according to Variable::Number, we
-/// create a mapping such that global variables are mapped to low
-/// indexes that are common across nodes, and local variables are
-/// mapped to a higher index space that is shared across nodes.
+/// This file provides some of the support for the Liveness class. In
+/// particular, it handles the sparsity representation of the mapping between
+/// Variables and CfgNodes. The idea is that since most variables are used only
+/// within a single basic block, we can partition the variables into "local" and
+/// "global" sets. Instead of sizing and indexing vectors according to
+/// Variable::Number, we create a mapping such that global variables are mapped
+/// to low indexes that are common across nodes, and local variables are mapped
+/// to a higher index space that is shared across nodes.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -32,7 +31,7 @@
 
 // Initializes the basic liveness-related data structures for full liveness
 // analysis (IsFullInit=true), or for incremental update after phi lowering
-// (IsFullInit=false).  In the latter case, FirstNode points to the first node
+// (IsFullInit=false). In the latter case, FirstNode points to the first node
 // added since starting phi lowering, and FirstVar points to the first Variable
 // added since starting phi lowering.
 void Liveness::initInternal(NodeList::const_iterator FirstNode,
@@ -60,7 +59,7 @@
   else
     assert(TmpNumGlobals == 0);
 
-  // Resize each LivenessNode::LiveToVarMap, and the global LiveToVarMap.  Reset
+  // Resize each LivenessNode::LiveToVarMap, and the global LiveToVarMap. Reset
   // the counts to 0.
   for (auto I = FirstNode, E = Func->getNodes().end(); I != E; ++I) {
     LivenessNode &N = Nodes[(*I)->getIndex()];
@@ -75,7 +74,7 @@
   RangeMask.resize(NumVars);
   RangeMask.set(0, NumVars); // Track all variables by default.
 
-  // Sort each variable into the appropriate LiveToVarMap.  Set VarToLiveMap.
+  // Sort each variable into the appropriate LiveToVarMap. Set VarToLiveMap.
   // Set RangeMask correctly for each variable.
   TmpNumGlobals = 0;
   for (auto I = FirstVar, E = Func->getVariables().end(); I != E; ++I) {
@@ -112,8 +111,7 @@
     // NumLocals, LiveToVarMap already initialized
     Node.LiveIn.resize(NumGlobals);
     Node.LiveOut.resize(NumGlobals);
-    // LiveBegin and LiveEnd are reinitialized before each pass over
-    // the block.
+    // LiveBegin and LiveEnd are reinitialized before each pass over the block.
   }
 }
 
diff --git a/src/IceLiveness.h b/src/IceLiveness.h
index 895138d..bd739d3 100644
--- a/src/IceLiveness.h
+++ b/src/IceLiveness.h
@@ -8,12 +8,11 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares the Liveness and LivenessNode classes,
-/// which are used for liveness analysis.  The node-specific
-/// information tracked for each Variable includes whether it is
-/// live on entry, whether it is live on exit, the instruction number
-/// that starts its live range, and the instruction number that ends
-/// its live range.  At the Cfg level, the actual live intervals are
+/// This file declares the Liveness and LivenessNode classes, which are used for
+/// liveness analysis. The node-specific information tracked for each Variable
+/// includes whether it is live on entry, whether it is live on exit, the
+/// instruction number that starts its live range, and the instruction number
+/// that ends its live range. At the Cfg level, the actual live intervals are
 /// recorded.
 ///
 //===----------------------------------------------------------------------===//
@@ -41,22 +40,20 @@
     /// NumLocals is the number of Variables local to this block.
     SizeT NumLocals = 0;
     /// NumNonDeadPhis tracks the number of Phi instructions that
-    /// Inst::liveness() identified as tentatively live.  If
-    /// NumNonDeadPhis changes from the last liveness pass, then liveness
-    /// has not yet converged.
+    /// Inst::liveness() identified as tentatively live. If NumNonDeadPhis
+    /// changes from the last liveness pass, then liveness has not yet
+    /// converged.
     SizeT NumNonDeadPhis = 0;
-    // LiveToVarMap maps a liveness bitvector index to a Variable.  This
-    // is generally just for printing/dumping.  The index should be less
-    // than NumLocals + Liveness::NumGlobals.
+    // LiveToVarMap maps a liveness bitvector index to a Variable. This is
+    // generally just for printing/dumping. The index should be less than
+    // NumLocals + Liveness::NumGlobals.
     std::vector<Variable *> LiveToVarMap;
     // LiveIn and LiveOut track the in- and out-liveness of the global
-    // variables.  The size of each vector is
-    // LivenessNode::NumGlobals.
+    // variables. The size of each vector is LivenessNode::NumGlobals.
     LivenessBV LiveIn, LiveOut;
-    // LiveBegin and LiveEnd track the instruction numbers of the start
-    // and end of each variable's live range within this block.  The
-    // index/key of each element is less than NumLocals +
-    // Liveness::NumGlobals.
+    // LiveBegin and LiveEnd track the instruction numbers of the start and end
+    // of each variable's live range within this block. The index/key of each
+    // element is less than NumLocals + Liveness::NumGlobals.
     LiveBeginEndMap LiveBegin, LiveEnd;
   };
 
@@ -111,11 +108,11 @@
   SizeT NumGlobals = 0;
   /// Size of Nodes is Cfg::Nodes.size().
   std::vector<LivenessNode> Nodes;
-  /// VarToLiveMap maps a Variable's Variable::Number to its live index
-  /// within its basic block.
+  /// VarToLiveMap maps a Variable's Variable::Number to its live index within
+  /// its basic block.
   std::vector<SizeT> VarToLiveMap;
-  /// LiveToVarMap is analogous to LivenessNode::LiveToVarMap, but for
-  /// non-local variables.
+  /// LiveToVarMap is analogous to LivenessNode::LiveToVarMap, but for non-local
+  /// variables.
   std::vector<Variable *> LiveToVarMap;
   /// RangeMask[Variable::Number] indicates whether we want to track that
   /// Variable's live range.
diff --git a/src/IceLoopAnalyzer.cpp b/src/IceLoopAnalyzer.cpp
index e2f7487..4e1b549 100644
--- a/src/IceLoopAnalyzer.cpp
+++ b/src/IceLoopAnalyzer.cpp
@@ -121,9 +121,9 @@
     return nullptr;
   }
 
-  // Reaching here means a loop has been found! It consists of the nodes on
-  // the top of the stack, down until the current node being processed, Node,
-  // is found.
+  // Reaching here means a loop has been found! It consists of the nodes on the
+  // top of the stack, down until the current node being processed, Node, is
+  // found.
   for (auto It = LoopStack.rbegin(); It != LoopStack.rend(); ++It) {
     (*It)->setOnStack(false);
     (*It)->incrementLoopNestDepth();
diff --git a/src/IceLoopAnalyzer.h b/src/IceLoopAnalyzer.h
index 5991798..19d38d1 100644
--- a/src/IceLoopAnalyzer.h
+++ b/src/IceLoopAnalyzer.h
@@ -35,6 +35,10 @@
   ///
   /// This only computes the loop nest depth within the function and does not
   /// take into account whether the function was called from within a loop.
+  // TODO(ascull): this currently uses a extension of Tarjan's algorithm with
+  // is bounded linear. ncbray suggests another algorithm which is linear in
+  // practice but not bounded linear. I think it also finds dominators.
+  // http://lenx.100871.net/papers/loop-SAS.pdf
   void computeLoopNestDepth();
 
 private:
@@ -88,11 +92,11 @@
   using LoopNodePtrList =
       std::vector<LoopNode *, CfgLocalAllocator<LoopNode *>>;
 
-  /// Process the node as part as part of Tarjan's algorithm and return either
-  /// a node to recurse into or nullptr when the node has been fully processed.
+  /// Process the node as part as part of Tarjan's algorithm and return either a
+  /// node to recurse into or nullptr when the node has been fully processed.
   LoopNode *processNode(LoopNode &Node);
 
-  /// The fuction to analyze for loops.
+  /// The function to analyze for loops.
   Cfg *const Func;
   /// A list of decorated nodes in the same order as Func->getNodes() which
   /// means the node's index will also be valid in this list.
diff --git a/src/IceOperand.cpp b/src/IceOperand.cpp
index 2013dcf..125c692 100644
--- a/src/IceOperand.cpp
+++ b/src/IceOperand.cpp
@@ -48,10 +48,9 @@
   Range.push_back(RangeElementType(Start, End));
 }
 
-// Returns true if this live range ends before Other's live range
-// starts.  This means that the highest instruction number in this
-// live range is less than or equal to the lowest instruction number
-// of the Other live range.
+// Returns true if this live range ends before Other's live range starts. This
+// means that the highest instruction number in this live range is less than or
+// equal to the lowest instruction number of the Other live range.
 bool LiveRange::endsBefore(const LiveRange &Other) const {
   // Neither range should be empty, but let's be graceful.
   if (Range.empty() || Other.Range.empty())
@@ -94,10 +93,10 @@
       break;
     }
   }
-  // This is an equivalent but less inefficient implementation.  It's
-  // expensive enough that we wouldn't want to run it under any build,
-  // but it could be enabled if e.g. the LiveRange implementation
-  // changes and extra testing is needed.
+  // This is an equivalent but less inefficient implementation. It's expensive
+  // enough that we wouldn't want to run it under any build, but it could be
+  // enabled if e.g. the LiveRange implementation changes and extra testing is
+  // needed.
   if (BuildDefs::extraValidation()) {
     LiveRange Temp;
     Temp.addSegment(OtherBegin, OtherBegin + 1);
@@ -108,11 +107,10 @@
   return Result;
 }
 
-// Returns true if the live range contains the given instruction
-// number.  This is only used for validating the live range
-// calculation.  The IsDest argument indicates whether the Variable
-// being tested is used in the Dest position (as opposed to a Src
-// position).
+// Returns true if the live range contains the given instruction number. This
+// is only used for validating the live range calculation. The IsDest argument
+// indicates whether the Variable being tested is used in the Dest position (as
+// opposed to a Src position).
 bool LiveRange::containsValue(InstNumberT Value, bool IsDest) const {
   for (const RangeElementType &I : Range) {
     if (I.first <= Value &&
@@ -134,8 +132,8 @@
 }
 
 Variable *Variable::asType(Type Ty) {
-  // Note: This returns a Variable, even if the "this" object is a
-  // subclass of Variable.
+  // Note: This returns a Variable, even if the "this" object is a subclass of
+  // Variable.
   if (!BuildDefs::dump() || getType() == Ty)
     return this;
   Variable *V = new (getCurrentCfgAllocator()->Allocate<Variable>())
@@ -171,20 +169,19 @@
 
   if (MultiBlock == MBS_MultiBlock)
     return;
-  // TODO(stichnot): If the use occurs as a source operand in the
-  // first instruction of the block, and its definition is in this
-  // block's only predecessor, we might consider not marking this as a
-  // separate use.  This may also apply if it's the first instruction
-  // of the block that actually uses a Variable.
+  // TODO(stichnot): If the use occurs as a source operand in the first
+  // instruction of the block, and its definition is in this block's only
+  // predecessor, we might consider not marking this as a separate use. This
+  // may also apply if it's the first instruction of the block that actually
+  // uses a Variable.
   assert(Node);
   bool MakeMulti = false;
   if (IsImplicit)
     MakeMulti = true;
-  // A phi source variable conservatively needs to be marked as
-  // multi-block, even if its definition is in the same block.  This
-  // is because there can be additional control flow before branching
-  // back to this node, and the variable is live throughout those
-  // nodes.
+  // A phi source variable conservatively needs to be marked as multi-block,
+  // even if its definition is in the same block. This is because there can be
+  // additional control flow before branching back to this node, and the
+  // variable is live throughout those nodes.
   if (Instr && llvm::isa<InstPhi>(Instr))
     MakeMulti = true;
 
@@ -211,10 +208,10 @@
 
 void VariableTracking::markDef(MetadataKind TrackingKind, const Inst *Instr,
                                CfgNode *Node) {
-  // TODO(stichnot): If the definition occurs in the last instruction
-  // of the block, consider not marking this as a separate use.  But
-  // be careful not to omit all uses of the variable if markDef() and
-  // markUse() both use this optimization.
+  // TODO(stichnot): If the definition occurs in the last instruction of the
+  // block, consider not marking this as a separate use. But be careful not to
+  // omit all uses of the variable if markDef() and markUse() both use this
+  // optimization.
   assert(Node);
 // Verify that instructions are added in increasing order.
 #ifndef NDEBUG
@@ -517,8 +514,7 @@
 
 // =========== Immediate Randomization and Pooling routines ==============
 // Specialization of the template member function for ConstantInteger32
-// TODO(stichnot): try to move this specialization into a target-specific
-// file.
+// TODO(stichnot): try to move this specialization into a target-specific file.
 template <>
 bool ConstantInteger32::shouldBeRandomizedOrPooled(const GlobalContext *Ctx) {
   uint32_t Threshold = Ctx->getFlags().getRandomizeAndPoolImmediatesThreshold();
diff --git a/src/IceOperand.h b/src/IceOperand.h
index 8bec48e..b4e06be 100644
--- a/src/IceOperand.h
+++ b/src/IceOperand.h
@@ -45,11 +45,11 @@
     kVariable,
     kVariable_Target, // leave space for target-specific variable kinds
     kVariable_Max = kVariable_Target + MaxTargetKinds,
-    // Target-specific operand classes use kTarget as the starting
-    // point for their Kind enum space. Note that the value-spaces are shared
-    // across targets. To avoid confusion over the definition of shared
-    // values, an object specific to one target should never be passed
-    // to a different target.
+    // Target-specific operand classes use kTarget as the starting point for
+    // their Kind enum space. Note that the value-spaces are shared across
+    // targets. To avoid confusion over the definition of shared values, an
+    // object specific to one target should never be passed to a different
+    // target.
     kTarget,
     kTarget_Max = std::numeric_limits<uint8_t>::max(),
   };
@@ -70,8 +70,8 @@
   /// \name Dumping functions.
   /// @{
 
-  /// The dump(Func,Str) implementation must be sure to handle the
-  /// situation where Func==nullptr.
+  /// The dump(Func,Str) implementation must be sure to handle the situation
+  /// where Func==nullptr.
   virtual void dump(const Cfg *Func, Ostream &Str) const = 0;
   void dump(const Cfg *Func) const {
     if (!BuildDefs::dump())
@@ -105,8 +105,8 @@
   return Str;
 }
 
-/// Constant is the abstract base class for constants.  All
-/// constants are allocated from a global arena and are pooled.
+/// Constant is the abstract base class for constants. All constants are
+/// allocated from a global arena and are pooled.
 class Constant : public Operand {
   Constant() = delete;
   Constant(const Constant &) = delete;
@@ -124,9 +124,9 @@
     return Kind >= kConst_Base && Kind <= kConst_Max;
   }
 
-  /// Judge if this given immediate should be randomized or pooled
-  /// By default should return false, only constant integers should
-  /// truly go through this method.
+  /// Judge if this given immediate should be randomized or pooled By default
+  /// should return false, only constant integers should truly go through this
+  /// method.
   virtual bool shouldBeRandomizedOrPooled(const GlobalContext *Ctx) {
     (void)Ctx;
     return false;
@@ -142,9 +142,9 @@
     Vars = nullptr;
     NumVars = 0;
   }
-  /// PoolEntryID is an integer that uniquely identifies the constant
-  /// within its constant pool.  It is used for building the constant
-  /// pool in the object code and for referencing its entries.
+  /// PoolEntryID is an integer that uniquely identifies the constant within its
+  /// constant pool. It is used for building the constant pool in the object
+  /// code and for referencing its entries.
   const uint32_t PoolEntryID;
   /// Whether we should pool this constant. Usually Float/Double and pooled
   /// Integers should be flagged true.
@@ -219,10 +219,9 @@
   Str << static_cast<int64_t>(getValue());
 }
 
-/// RelocatableTuple bundles the parameters that are used to
-/// construct an ConstantRelocatable.  It is done this way so that
-/// ConstantRelocatable can fit into the global constant pool
-/// template mechanism.
+/// RelocatableTuple bundles the parameters that are used to construct an
+/// ConstantRelocatable. It is done this way so that ConstantRelocatable can fit
+/// into the global constant pool template mechanism.
 class RelocatableTuple {
   RelocatableTuple() = delete;
   RelocatableTuple &operator=(const RelocatableTuple &) = delete;
@@ -240,8 +239,8 @@
 
 bool operator==(const RelocatableTuple &A, const RelocatableTuple &B);
 
-/// ConstantRelocatable represents a symbolic constant combined with
-/// a fixed offset.
+/// ConstantRelocatable represents a symbolic constant combined with a fixed
+/// offset.
 class ConstantRelocatable : public Constant {
   ConstantRelocatable() = delete;
   ConstantRelocatable(const ConstantRelocatable &) = delete;
@@ -282,9 +281,9 @@
   bool SuppressMangling;
 };
 
-/// ConstantUndef represents an unspecified bit pattern. Although it is
-/// legal to lower ConstantUndef to any value, backends should try to
-/// make code generation deterministic by lowering ConstantUndefs to 0.
+/// ConstantUndef represents an unspecified bit pattern. Although it is legal to
+/// lower ConstantUndef to any value, backends should try to make code
+/// generation deterministic by lowering ConstantUndefs to 0.
 class ConstantUndef : public Constant {
   ConstantUndef() = delete;
   ConstantUndef(const ConstantUndef &) = delete;
@@ -315,9 +314,9 @@
       : Constant(kConstUndef, Ty, PoolEntryID) {}
 };
 
-/// RegWeight is a wrapper for a uint32_t weight value, with a
-/// special value that represents infinite weight, and an addWeight()
-/// method that ensures that W+infinity=infinity.
+/// RegWeight is a wrapper for a uint32_t weight value, with a special value
+/// that represents infinite weight, and an addWeight() method that ensures that
+/// W+infinity=infinity.
 class RegWeight {
 public:
   RegWeight() = default;
@@ -346,15 +345,15 @@
 bool operator<=(const RegWeight &A, const RegWeight &B);
 bool operator==(const RegWeight &A, const RegWeight &B);
 
-/// LiveRange is a set of instruction number intervals representing
-/// a variable's live range.  Generally there is one interval per basic
-/// block where the variable is live, but adjacent intervals get
-/// coalesced into a single interval.
+/// LiveRange is a set of instruction number intervals representing a variable's
+/// live range. Generally there is one interval per basic block where the
+/// variable is live, but adjacent intervals get coalesced into a single
+/// interval.
 class LiveRange {
 public:
   LiveRange() = default;
-  /// Special constructor for building a kill set.  The advantage is
-  /// that we can reserve the right amount of space in advance.
+  /// Special constructor for building a kill set. The advantage is that we can
+  /// reserve the right amount of space in advance.
   explicit LiveRange(const std::vector<InstNumberT> &Kills) {
     Range.reserve(Kills.size());
     for (InstNumberT I : Kills)
@@ -392,22 +391,21 @@
   using RangeType =
       std::vector<RangeElementType, CfgLocalAllocator<RangeElementType>>;
   RangeType Range;
-  /// TrimmedBegin is an optimization for the overlaps() computation.
-  /// Since the linear-scan algorithm always calls it as overlaps(Cur)
-  /// and Cur advances monotonically according to live range start, we
-  /// can optimize overlaps() by ignoring all segments that end before
-  /// the start of Cur's range.  The linear-scan code enables this by
-  /// calling trim() on the ranges of interest as Cur advances.  Note
-  /// that linear-scan also has to initialize TrimmedBegin at the
-  /// beginning by calling untrim().
+  /// TrimmedBegin is an optimization for the overlaps() computation. Since the
+  /// linear-scan algorithm always calls it as overlaps(Cur) and Cur advances
+  /// monotonically according to live range start, we can optimize overlaps() by
+  /// ignoring all segments that end before the start of Cur's range. The
+  /// linear-scan code enables this by calling trim() on the ranges of interest
+  /// as Cur advances. Note that linear-scan also has to initialize TrimmedBegin
+  /// at the beginning by calling untrim().
   RangeType::const_iterator TrimmedBegin;
 };
 
 Ostream &operator<<(Ostream &Str, const LiveRange &L);
 
 /// Variable represents an operand that is register-allocated or
-/// stack-allocated.  If it is register-allocated, it will ultimately
-/// have a non-negative RegNum field.
+/// stack-allocated. If it is register-allocated, it will ultimately have a
+/// non-negative RegNum field.
 class Variable : public Operand {
   Variable() = delete;
   Variable(const Variable &) = delete;
@@ -495,11 +493,11 @@
     LoVar = Lo;
     HiVar = Hi;
   }
-  /// Creates a temporary copy of the variable with a different type.
-  /// Used primarily for syntactic correctness of textual assembly
-  /// emission.  Note that only basic information is copied, in
-  /// particular not IsArgument, IsImplicitArgument, IgnoreLiveness,
-  /// RegNumTmp, Live, LoVar, HiVar, VarsReal.
+  /// Creates a temporary copy of the variable with a different type. Used
+  /// primarily for syntactic correctness of textual assembly emission. Note
+  /// that only basic information is copied, in particular not IsArgument,
+  /// IsImplicitArgument, IgnoreLiveness, RegNumTmp, Live, LoVar, HiVar,
+  /// VarsReal.
   Variable *asType(Type Ty);
 
   void emit(const Cfg *Func) const override;
@@ -521,18 +519,18 @@
     Vars[0] = this;
     NumVars = 1;
   }
-  /// Number is unique across all variables, and is used as a
-  /// (bit)vector index for liveness analysis.
+  /// Number is unique across all variables, and is used as a (bit)vector index
+  /// for liveness analysis.
   const SizeT Number;
   Cfg::IdentifierIndexType NameIndex = Cfg::IdentifierIndexInvalid;
   bool IsArgument = false;
   bool IsImplicitArgument = false;
-  /// IgnoreLiveness means that the variable should be ignored when
-  /// constructing and validating live ranges.  This is usually
-  /// reserved for the stack pointer.
+  /// IgnoreLiveness means that the variable should be ignored when constructing
+  /// and validating live ranges. This is usually reserved for the stack
+  /// pointer.
   bool IgnoreLiveness = false;
-  /// StackOffset is the canonical location on stack (only if
-  /// RegNum==NoRegister || IsArgument).
+  /// StackOffset is the canonical location on stack (only if RegNum==NoRegister
+  /// || IsArgument).
   int32_t StackOffset = 0;
   /// RegNum is the allocated register, or NoRegister if it isn't
   /// register-allocated.
@@ -541,17 +539,15 @@
   int32_t RegNumTmp = NoRegister;
   RegRequirement RegRequirement = RR_MayHaveRegister;
   LiveRange Live;
-  // LoVar and HiVar are needed for lowering from 64 to 32 bits.  When
-  // lowering from I64 to I32 on a 32-bit architecture, we split the
-  // variable into two machine-size pieces.  LoVar is the low-order
-  // machine-size portion, and HiVar is the remaining high-order
-  // portion.  TODO: It's wasteful to penalize all variables on all
-  // targets this way; use a sparser representation.  It's also
-  // wasteful for a 64-bit target.
+  // LoVar and HiVar are needed for lowering from 64 to 32 bits. When lowering
+  // from I64 to I32 on a 32-bit architecture, we split the variable into two
+  // machine-size pieces. LoVar is the low-order machine-size portion, and
+  // HiVar is the remaining high-order portion.
+  // TODO: It's wasteful to penalize all variables on all targets this way; use
+  // a sparser representation. It's also wasteful for a 64-bit target.
   Variable *LoVar = nullptr;
   Variable *HiVar = nullptr;
-  /// VarsReal (and Operand::Vars) are set up such that Vars[0] ==
-  /// this.
+  /// VarsReal (and Operand::Vars) are set up such that Vars[0] == this.
   Variable *VarsReal[1];
 };
 
@@ -611,13 +607,12 @@
 
 public:
   explicit VariablesMetadata(const Cfg *Func) : Func(Func) {}
-  /// Initialize the state by traversing all instructions/variables in
-  /// the CFG.
+  /// Initialize the state by traversing all instructions/variables in the CFG.
   void init(MetadataKind TrackingKind);
-  /// Add a single node.  This is called by init(), and can be called
+  /// Add a single node. This is called by init(), and can be called
   /// incrementally from elsewhere, e.g. after edge-splitting.
   void addNode(CfgNode *Node);
-  /// Returns whether the given Variable is tracked in this object.  It should
+  /// Returns whether the given Variable is tracked in this object. It should
   /// only return false if changes were made to the CFG after running init(), in
   /// which case the state is stale and the results shouldn't be trusted (but it
   /// may be OK e.g. for dumping).
@@ -627,29 +622,27 @@
 
   /// Returns whether the given Variable has multiple definitions.
   bool isMultiDef(const Variable *Var) const;
-  /// Returns the first definition instruction of the given Variable.  This is
+  /// Returns the first definition instruction of the given Variable. This is
   /// only valid for variables whose definitions are all within the same block,
   /// e.g. T after the lowered sequence "T=B; T+=C; A=T", for which
-  /// getFirstDefinition(T) would return the "T=B" instruction.  For variables
+  /// getFirstDefinition(T) would return the "T=B" instruction. For variables
   /// with definitions span multiple blocks, nullptr is returned.
   const Inst *getFirstDefinition(const Variable *Var) const;
-  /// Returns the definition instruction of the given Variable, when
-  /// the variable has exactly one definition.  Otherwise, nullptr is
-  /// returned.
+  /// Returns the definition instruction of the given Variable, when the
+  /// variable has exactly one definition. Otherwise, nullptr is returned.
   const Inst *getSingleDefinition(const Variable *Var) const;
   /// Returns the list of all definition instructions of the given Variable.
   const InstDefList &getLatterDefinitions(const Variable *Var) const;
 
-  /// Returns whether the given Variable is live across multiple
-  /// blocks.  Mainly, this is used to partition Variables into
-  /// single-block versus multi-block sets for leveraging sparsity in
-  /// liveness analysis, and for implementing simple stack slot
-  /// coalescing.  As a special case, function arguments are always
-  /// considered multi-block because they are live coming into the
-  /// entry block.
+  /// Returns whether the given Variable is live across multiple blocks. Mainly,
+  /// this is used to partition Variables into single-block versus multi-block
+  /// sets for leveraging sparsity in liveness analysis, and for implementing
+  /// simple stack slot coalescing. As a special case, function arguments are
+  /// always considered multi-block because they are live coming into the entry
+  /// block.
   bool isMultiBlock(const Variable *Var) const;
   /// Returns the node that the given Variable is used in, assuming
-  /// isMultiBlock() returns false.  Otherwise, nullptr is returned.
+  /// isMultiBlock() returns false. Otherwise, nullptr is returned.
   CfgNode *getLocalUseNode(const Variable *Var) const;
 
   /// Returns the total use weight computed as the sum of uses multiplied by a
diff --git a/src/IcePhiLoweringImpl.h b/src/IcePhiLoweringImpl.h
index cf932d6..1957645 100644
--- a/src/IcePhiLoweringImpl.h
+++ b/src/IcePhiLoweringImpl.h
@@ -24,11 +24,11 @@
 namespace Ice {
 namespace PhiLowering {
 
-// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
-// preserve integrity of liveness analysis.  This is needed for 32-bit
-// targets.  This assumes the 32-bit target has loOperand, hiOperand,
-// and legalizeUndef methods.  Undef values are also legalized, since
-// loOperand() and hiOperand() don't expect Undef input.
+/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
+/// integrity of liveness analysis. This is needed for 32-bit targets. This
+/// assumes the 32-bit target has loOperand, hiOperand, and legalizeUndef
+/// methods. Undef values are also legalized, since loOperand() and hiOperand()
+/// don't expect Undef input.
 template <class TargetT>
 void prelowerPhis32Bit(TargetT *Target, CfgNode *Node, Cfg *Func) {
   for (Inst &I : Node->getPhis()) {
diff --git a/src/IceRNG.cpp b/src/IceRNG.cpp
index 89b1893..987d1a4 100644
--- a/src/IceRNG.cpp
+++ b/src/IceRNG.cpp
@@ -25,9 +25,9 @@
 // TODO(wala,stichnot): Switch to RNG implementation from LLVM or C++11.
 //
 // TODO(wala,stichnot): Make it possible to replay the RNG sequence in a
-// subsequent run, for reproducing a bug.  Print the seed in a comment
-// in the asm output.  Embed the seed in the binary via metadata that an
-// attacker can't introspect.
+// subsequent run, for reproducing a bug. Print the seed in a comment in the
+// asm output. Embed the seed in the binary via metadata that an attacker can't
+// introspect.
 RandomNumberGenerator::RandomNumberGenerator(uint64_t Seed, llvm::StringRef)
     : State(Seed) {}
 
diff --git a/src/IceRNG.h b/src/IceRNG.h
index 4eeefa6..7ee2e39 100644
--- a/src/IceRNG.h
+++ b/src/IceRNG.h
@@ -52,9 +52,9 @@
   uint64_t State;
 };
 
-/// This class adds additional random number generator utilities. The
-/// reason for the wrapper class is that we want to keep the
-/// RandomNumberGenerator interface identical to LLVM's.
+/// This class adds additional random number generator utilities. The reason for
+/// the wrapper class is that we want to keep the RandomNumberGenerator
+/// interface identical to LLVM's.
 class RandomNumberGeneratorWrapper {
   RandomNumberGeneratorWrapper() = delete;
   RandomNumberGeneratorWrapper(const RandomNumberGeneratorWrapper &) = delete;
@@ -71,9 +71,9 @@
   RandomNumberGenerator &RNG;
 };
 
-/// RandomShuffle is an implementation of std::random_shuffle() that
-/// doesn't change across stdlib implementations.  Adapted from a
-/// sample implementation at cppreference.com.
+/// RandomShuffle is an implementation of std::random_shuffle() that doesn't
+/// change across stdlib implementations. Adapted from a sample implementation
+/// at cppreference.com.
 template <class RandomIt, class RandomFunc>
 void RandomShuffle(RandomIt First, RandomIt Last, RandomFunc &&RNG) {
   for (auto i = Last - First - 1; i > 0; --i)
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index ad5c2b6..304ac37 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -82,14 +82,14 @@
     : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()),
       Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)) {}
 
-// Prepare for full register allocation of all variables.  We depend on
-// liveness analysis to have calculated live ranges.
+// Prepare for full register allocation of all variables. We depend on liveness
+// analysis to have calculated live ranges.
 void LinearScan::initForGlobal() {
   TimerMarker T(TimerStack::TT_initUnhandled, Func);
   FindPreference = true;
   // For full register allocation, normally we want to enable FindOverlap
   // (meaning we look for opportunities for two overlapping live ranges to
-  // safely share the same register).  However, we disable it for phi-lowering
+  // safely share the same register). However, we disable it for phi-lowering
   // register allocation since no overlap opportunities should be available and
   // it's more expensive to look for opportunities.
   FindOverlap = (Kind != RAK_Phi);
@@ -262,7 +262,7 @@
 }
 
 // This is called when Cur must be allocated a register but no registers are
-// available across Cur's live range.  To handle this, we find a register that
+// available across Cur's live range. To handle this, we find a register that
 // is not explicitly used during Cur's live range, spill that register to a
 // stack location right before Cur's live range begins, and fill (reload) the
 // register from the stack location right after Cur's live range ends.
@@ -297,9 +297,9 @@
     if (I->getNumber() == End)
       FillPoint = I;
     if (SpillPoint != E) {
-      // Remove from RegMask any physical registers referenced during Cur's live
-      // range.  Start looking after SpillPoint gets set, i.e. once Cur's live
-      // range begins.
+      // Remove from RegMask any physical registers referenced during Cur's
+      // live range. Start looking after SpillPoint gets set, i.e. once Cur's
+      // live range begins.
       FOREACH_VAR_IN_INST(Var, *I) {
         if (!Var->hasRegTmp())
           continue;
@@ -319,8 +319,9 @@
   assert(RegNum != -1);
   Iter.Cur->setRegNumTmp(RegNum);
   Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType());
-  // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc
-  // is correctly identified as !isMultiBlock(), reducing stack frame size.
+  // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that
+  // SpillLoc is correctly identified as !isMultiBlock(), reducing stack frame
+  // size.
   Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType());
   // Add "reg=FakeDef;spill=reg" before SpillPoint
   Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg));
@@ -413,8 +414,8 @@
         if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) {
           int32_t SrcReg = SrcVar->getRegNumTmp();
           // Only consider source variables that have (so far) been assigned a
-          // register. That register must be one in the RegMask set, e.g.
-          // don't try to prefer the stack pointer as a result of the stacksave
+          // register. That register must be one in the RegMask set, e.g. don't
+          // try to prefer the stack pointer as a result of the stacksave
           // intrinsic.
           if (SrcVar->hasRegTmp() && Iter.RegMask[SrcReg]) {
             if (FindOverlap && !Iter.Free[SrcReg]) {
@@ -469,7 +470,7 @@
 
 // Remove registers from the Free[] list where an Unhandled pre-colored range
 // overlaps with the current range, and set those registers to infinite weight
-// so that they aren't candidates for eviction.  Cur->rangeEndsBefore(Item) is
+// so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is
 // an early exit check that turns a guaranteed O(N^2) algorithm into expected
 // linear complexity.
 void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) {
@@ -610,9 +611,9 @@
       const SizeT Index = I - 1;
       Variable *Item = Inactive[Index];
       // Note: The Item->rangeOverlaps(Cur) clause is not part of the
-      // description of AssignMemLoc() in the original paper.  But there
-      // doesn't seem to be any need to evict an inactive live range that
-      // doesn't overlap with the live range currently being considered. It's
+      // description of AssignMemLoc() in the original paper. But there doesn't
+      // seem to be any need to evict an inactive live range that doesn't
+      // overlap with the live range currently being considered. It's
       // especially bad if we would end up evicting an infinite-weight but
       // currently-inactive live range. The most common situation for this
       // would be a scratch register kill set for call instructions.
@@ -644,9 +645,9 @@
   if (Randomized) {
     // Create a random number generator for regalloc randomization. Merge
     // function's sequence and Kind value as the Salt. Because regAlloc() is
-    // called twice under O2, the second time with RAK_Phi, we check
-    // Kind == RAK_Phi to determine the lowest-order bit to make sure the Salt
-    // is different.
+    // called twice under O2, the second time with RAK_Phi, we check Kind ==
+    // RAK_Phi to determine the lowest-order bit to make sure the Salt is
+    // different.
     uint64_t Salt =
         (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u);
     Target->makeRandomRegisterPermutation(
diff --git a/src/IceRegAlloc.h b/src/IceRegAlloc.h
index b3986a3..ec37aa0 100644
--- a/src/IceRegAlloc.h
+++ b/src/IceRegAlloc.h
@@ -60,10 +60,10 @@
 
   void initForGlobal();
   void initForInfOnly();
-  /// Move an item from the From set to the To set.  From[Index] is pushed onto
+  /// Move an item from the From set to the To set. From[Index] is pushed onto
   /// the end of To[], then the item is efficiently removed from From[] by
   /// effectively swapping it with the last item in From[] and then popping it
-  /// from the back.  As such, the caller is best off iterating over From[] in
+  /// from the back. As such, the caller is best off iterating over From[] in
   /// reverse order to avoid the need for special handling of the iterator.
   void moveItem(UnorderedRanges &From, SizeT Index, UnorderedRanges &To) {
     To.push_back(From[Index]);
@@ -109,8 +109,8 @@
   /// currently assigned to. It can be greater than 1 as a result of
   /// AllowOverlap inference.
   llvm::SmallVector<int32_t, REGS_SIZE> RegUses;
-  // TODO(jpp): for some architectures a SmallBitVector might not be big enough.
-  // Evaluate what the performance impact on those architectures is.
+  // TODO(jpp): for some architectures a SmallBitVector might not be big
+  // enough. Evaluate what the performance impact on those architectures is.
   llvm::SmallVector<const llvm::SmallBitVector *, REGS_SIZE> RegAliases;
   bool FindPreference = false;
   bool FindOverlap = false;
diff --git a/src/IceRegistersARM32.h b/src/IceRegistersARM32.h
index a80b9b2..eafed3a 100644
--- a/src/IceRegistersARM32.h
+++ b/src/IceRegistersARM32.h
@@ -23,8 +23,8 @@
 
 class RegARM32 {
 public:
-  /// An enum of every register. The enum value may not match the encoding
-  /// used to binary encode register operands in instructions.
+  /// An enum of every register. The enum value may not match the encoding used
+  /// to binary encode register operands in instructions.
   enum AllRegisters {
 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
           isFP32, isFP64, isVec128, alias_init)                                \
@@ -37,8 +37,8 @@
 #undef X
   };
 
-  /// An enum of GPR Registers. The enum value does match the encoding used
-  /// to binary encode register operands in instructions.
+  /// An enum of GPR Registers. The enum value does match the encoding used to
+  /// binary encode register operands in instructions.
   enum GPRRegister {
 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
           isFP32, isFP64, isVec128, alias_init)                                \
diff --git a/src/IceRegistersX8632.h b/src/IceRegistersX8632.h
index b0d22bb..73492ef 100644
--- a/src/IceRegistersX8632.h
+++ b/src/IceRegistersX8632.h
@@ -23,8 +23,8 @@
 
 class RegX8632 {
 public:
-  /// An enum of every register. The enum value may not match the encoding
-  /// used to binary encode register operands in instructions.
+  /// An enum of every register. The enum value may not match the encoding used
+  /// to binary encode register operands in instructions.
   enum AllRegisters {
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
           frameptr, isI8, isInt, isFP)                                         \
@@ -37,8 +37,8 @@
 #undef X
   };
 
-  /// An enum of GPR Registers. The enum value does match the encoding used
-  /// to binary encode register operands in instructions.
+  /// An enum of GPR Registers. The enum value does match the encoding used to
+  /// binary encode register operands in instructions.
   enum GPRRegister {
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
           frameptr, isI8, isInt, isFP)                                         \
@@ -48,8 +48,8 @@
         Encoded_Not_GPR = -1
   };
 
-  /// An enum of XMM Registers. The enum value does match the encoding used
-  /// to binary encode register operands in instructions.
+  /// An enum of XMM Registers. The enum value does match the encoding used to
+  /// binary encode register operands in instructions.
   enum XmmRegister {
 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr,      \
           frameptr, isI8, isInt, isFP)                                         \
@@ -59,8 +59,8 @@
         Encoded_Not_Xmm = -1
   };
 
-  /// An enum of Byte Registers. The enum value does match the encoding used
-  /// to binary encode register operands in instructions.
+  /// An enum of Byte Registers. The enum value does match the encoding used to
+  /// binary encode register operands in instructions.
   enum ByteRegister {
 #define X(val, encode) Encoded_##val encode,
     REGX8632_BYTEREG_TABLE
diff --git a/src/IceRegistersX8664.h b/src/IceRegistersX8664.h
index bc448b2..3a10f00 100644
--- a/src/IceRegistersX8664.h
+++ b/src/IceRegistersX8664.h
@@ -23,8 +23,8 @@
 
 class RegX8664 {
 public:
-  /// An enum of every register. The enum value may not match the encoding
-  /// used to binary encode register operands in instructions.
+  /// An enum of every register. The enum value may not match the encoding used
+  /// to binary encode register operands in instructions.
   enum AllRegisters {
 #define X(val, encode, name64, name, name16, name8, scratch, preserved,        \
           stackptr, frameptr, isInt, isFP)                                     \
@@ -37,8 +37,8 @@
 #undef X
   };
 
-  /// An enum of GPR Registers. The enum value does match the encoding used
-  /// to binary encode register operands in instructions.
+  /// An enum of GPR Registers. The enum value does match the encoding used to
+  /// binary encode register operands in instructions.
   enum GPRRegister {
 #define X(val, encode, name64, name, name16, name8, scratch, preserved,        \
           stackptr, frameptr, isInt, isFP)                                     \
@@ -48,8 +48,8 @@
         Encoded_Not_GPR = -1
   };
 
-  /// An enum of XMM Registers. The enum value does match the encoding used
-  /// to binary encode register operands in instructions.
+  /// An enum of XMM Registers. The enum value does match the encoding used to
+  /// binary encode register operands in instructions.
   enum XmmRegister {
 #define X(val, encode, name64, name, name16, name8, scratch, preserved,        \
           stackptr, frameptr, isInt, isFP)                                     \
@@ -59,8 +59,8 @@
         Encoded_Not_Xmm = -1
   };
 
-  /// An enum of Byte Registers. The enum value does match the encoding used
-  /// to binary encode register operands in instructions.
+  /// An enum of Byte Registers. The enum value does match the encoding used to
+  /// binary encode register operands in instructions.
   enum ByteRegister {
 #define X(val, encode) Encoded_##val encode,
     REGX8664_BYTEREG_TABLE
diff --git a/src/IceSwitchLowering.cpp b/src/IceSwitchLowering.cpp
index 6207495..047aa91 100644
--- a/src/IceSwitchLowering.cpp
+++ b/src/IceSwitchLowering.cpp
@@ -55,8 +55,8 @@
 
   // Test for a single jump table. This can be done in constant time whereas
   // finding the best set of jump table would be quadratic, too slow(?). If
-  // jump tables were included in the search tree we'd first have to traverse to
-  // them. Ideally we would have an unbalanced tree which is biased towards
+  // jump tables were included in the search tree we'd first have to traverse
+  // to them. Ideally we would have an unbalanced tree which is biased towards
   // frequently executed code but we can't do this well without profiling data.
   // So, this single jump table is a good starting point where you can get to
   // the jump table quickly without figuring out how to unbalance the tree.
diff --git a/src/IceSwitchLowering.h b/src/IceSwitchLowering.h
index e1cdb8a..df3bef3 100644
--- a/src/IceSwitchLowering.h
+++ b/src/IceSwitchLowering.h
@@ -75,8 +75,8 @@
   bool tryAppend(const CaseCluster &New);
 };
 
-/// Store the jump table data so that it can be emitted later in the correct
-/// ELF section once the offsets from the start of the function are known.
+/// Store the jump table data so that it can be emitted later in the correct ELF
+/// section once the offsets from the start of the function are known.
 class JumpTableData {
   JumpTableData() = delete;
   JumpTableData &operator=(const JumpTableData &) = delete;
diff --git a/src/IceTLS.h b/src/IceTLS.h
index 0e7731d..9a20e70 100644
--- a/src/IceTLS.h
+++ b/src/IceTLS.h
@@ -9,9 +9,8 @@
 ///
 /// \file
 /// This file defines macros for working around the lack of support for
-/// thread_local in MacOS 10.6.  It assumes std::thread is written in
-/// terms of pthread.  Define ICE_THREAD_LOCAL_HACK to enable the
-/// pthread workarounds.
+/// thread_local in MacOS 10.6. It assumes std::thread is written in terms of
+/// pthread. Define ICE_THREAD_LOCAL_HACK to enable the pthread workarounds.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -26,25 +25,25 @@
 
 // Defines 4 macros for unifying thread_local and pthread:
 //
-// ICE_TLS_DECLARE_FIELD(Type, FieldName): Declare a static
-// thread_local field inside the current class definition.  "Type"
-// needs to be a pointer type, such as int* or class Foo*.
+// ICE_TLS_DECLARE_FIELD(Type, FieldName): Declare a static thread_local field
+// inside the current class definition. "Type" needs to be a pointer type, such
+// as int* or class Foo*.
 //
 // ICE_TLS_DEFINE_FIELD(Type, ClassName, FieldName): Define a static
-// thread_local field outside of its class definition.  The field will
+// thread_local field outside of its class definition. The field will
 // ultimately be initialized to nullptr.
 //
-// ICE_TLS_INIT_FIELD(FieldName): Ensure the thread_local field is
-// properly initialized.  This is intended to be called from within a
-// static method of the field's class after main() starts (to ensure
-// that the pthread library is fully initialized) but before any uses
-// of ICE_TLS_GET_FIELD or ICE_TLS_SET_FIELD.
+// ICE_TLS_INIT_FIELD(FieldName): Ensure the thread_local field is properly
+// initialized. This is intended to be called from within a static method of
+// the field's class after main() starts (to ensure that the pthread library is
+// fully initialized) but before any uses of ICE_TLS_GET_FIELD or
+// ICE_TLS_SET_FIELD.
 //
 // ICE_TLS_GET_FIELD(Type, FieldName): Read the value of the static
-// thread_local field.  Must be done within the context of its class.
+// thread_local field. Must be done within the context of its class.
 //
 // ICE_TLS_SET_FIELD(FieldName, Value): Write a value into the static
-// thread_local field.  Must be done within the context of its class.
+// thread_local field. Must be done within the context of its class.
 
 // TODO(stichnot): Limit this define to only the platforms that
 // absolutely require it.  And ideally, eventually remove this hack
@@ -52,17 +51,16 @@
 #define ICE_THREAD_LOCAL_HACK
 #ifdef ICE_THREAD_LOCAL_HACK
 
-// For a static thread_local field F of a class C, instead of
-// declaring and defining C::F, we create two static fields:
+// For a static thread_local field F of a class C, instead of declaring and
+// defining C::F, we create two static fields:
 //   static pthread_key_t F__key;
 //   static int F__initStatus;
 //
 // The F__initStatus field is used to hold the result of the
-// pthread_key_create() call, where a zero value indicates success,
-// and a nonzero value indicates failure or that ICE_TLS_INIT_FIELD()
-// was never called.
-// The F__key field is used as the argument to
-// pthread_getspecific() and pthread_setspecific().
+// pthread_key_create() call, where a zero value indicates success, and a
+// nonzero value indicates failure or that ICE_TLS_INIT_FIELD() was never
+// called. The F__key field is used as the argument to pthread_getspecific()
+// and pthread_setspecific().
 
 #include <pthread.h>
 
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 5268aa7..6920788 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -8,11 +8,10 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements the skeleton of the TargetLowering class,
-/// specifically invoking the appropriate lowering method for a given
-/// instruction kind and driving global register allocation.  It also
-/// implements the non-deleted instruction iteration in
-/// LoweringContext.
+/// This file implements the skeleton of the TargetLowering class, specifically
+/// invoking the appropriate lowering method for a given instruction kind and
+/// driving global register allocation. It also implements the non-deleted
+/// instruction iteration in LoweringContext.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -117,29 +116,27 @@
   }
 }
 
-// Lowers a single instruction according to the information in
-// Context, by checking the Context.Cur instruction kind and calling
-// the appropriate lowering method.  The lowering method should insert
-// target instructions at the Cur.Next insertion point, and should not
-// delete the Context.Cur instruction or advance Context.Cur.
+// Lowers a single instruction according to the information in Context, by
+// checking the Context.Cur instruction kind and calling the appropriate
+// lowering method. The lowering method should insert target instructions at
+// the Cur.Next insertion point, and should not delete the Context.Cur
+// instruction or advance Context.Cur.
 //
-// The lowering method may look ahead in the instruction stream as
-// desired, and lower additional instructions in conjunction with the
-// current one, for example fusing a compare and branch.  If it does,
-// it should advance Context.Cur to point to the next non-deleted
-// instruction to process, and it should delete any additional
-// instructions it consumes.
+// The lowering method may look ahead in the instruction stream as desired, and
+// lower additional instructions in conjunction with the current one, for
+// example fusing a compare and branch. If it does, it should advance
+// Context.Cur to point to the next non-deleted instruction to process, and it
+// should delete any additional instructions it consumes.
 void TargetLowering::lower() {
   assert(!Context.atEnd());
   Inst *Inst = Context.getCur();
   Inst->deleteIfDead();
   if (!Inst->isDeleted() && !llvm::isa<InstFakeDef>(Inst) &&
       !llvm::isa<InstFakeUse>(Inst)) {
-    // Mark the current instruction as deleted before lowering,
-    // otherwise the Dest variable will likely get marked as non-SSA.
-    // See Variable::setDefinition().  However, just pass-through
-    // FakeDef and FakeUse instructions that might have been inserted
-    // prior to lowering.
+    // Mark the current instruction as deleted before lowering, otherwise the
+    // Dest variable will likely get marked as non-SSA. See
+    // Variable::setDefinition(). However, just pass-through FakeDef and
+    // FakeUse instructions that might have been inserted prior to lowering.
     Inst->setDeleted();
     switch (Inst->getKind()) {
     case Inst::Alloca:
@@ -231,10 +228,10 @@
   Func->setError("Can't lower unsupported instruction type");
 }
 
-// Drives register allocation, allowing all physical registers (except
-// perhaps for the frame pointer) to be allocated.  This set of
-// registers could potentially be parameterized if we want to restrict
-// registers e.g. for performance testing.
+// Drives register allocation, allowing all physical registers (except perhaps
+// for the frame pointer) to be allocated. This set of registers could
+// potentially be parameterized if we want to restrict registers e.g. for
+// performance testing.
 void TargetLowering::regAlloc(RegAllocKind Kind) {
   TimerMarker T(TimerStack::TT_regAlloc, Func);
   LinearScan LinearScan(Func);
@@ -250,15 +247,14 @@
 }
 
 void TargetLowering::inferTwoAddress() {
-  // Find two-address non-SSA instructions where Dest==Src0, and set
-  // the DestNonKillable flag to keep liveness analysis consistent.
+  // Find two-address non-SSA instructions where Dest==Src0, and set the
+  // DestNonKillable flag to keep liveness analysis consistent.
   for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) {
     if (Inst->isDeleted())
       continue;
     if (Variable *Dest = Inst->getDest()) {
-      // TODO(stichnot): We may need to consider all source
-      // operands, not just the first one, if using 3-address
-      // instructions.
+      // TODO(stichnot): We may need to consider all source operands, not just
+      // the first one, if using 3-address instructions.
       if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest)
         Inst->setDestNonKillable();
     }
@@ -268,8 +264,8 @@
 void TargetLowering::sortVarsByAlignment(VarList &Dest,
                                          const VarList &Source) const {
   Dest = Source;
-  // Instead of std::sort, we could do a bucket sort with log2(alignment)
-  // as the buckets, if performance is an issue.
+  // Instead of std::sort, we could do a bucket sort with log2(alignment) as
+  // the buckets, if performance is an issue.
   std::sort(Dest.begin(), Dest.end(),
             [this](const Variable *V1, const Variable *V2) {
               return typeWidthInBytesOnStack(V1->getType()) >
@@ -296,17 +292,17 @@
     }
   }
 
-  // If SimpleCoalescing is false, each variable without a register
-  // gets its own unique stack slot, which leads to large stack
-  // frames.  If SimpleCoalescing is true, then each "global" variable
-  // without a register gets its own slot, but "local" variable slots
-  // are reused across basic blocks.  E.g., if A and B are local to
-  // block 1 and C is local to block 2, then C may share a slot with A or B.
+  // If SimpleCoalescing is false, each variable without a register gets its
+  // own unique stack slot, which leads to large stack frames. If
+  // SimpleCoalescing is true, then each "global" variable without a register
+  // gets its own slot, but "local" variable slots are reused across basic
+  // blocks. E.g., if A and B are local to block 1 and C is local to block 2,
+  // then C may share a slot with A or B.
   //
   // We cannot coalesce stack slots if this function calls a "returns twice"
-  // function. In that case, basic blocks may be revisited, and variables
-  // local to those basic blocks are actually live until after the
-  // called function returns a second time.
+  // function. In that case, basic blocks may be revisited, and variables local
+  // to those basic blocks are actually live until after the called function
+  // returns a second time.
   const bool SimpleCoalescing = !callsReturnsTwice();
 
   std::vector<size_t> LocalsSize(Func->getNumNodes());
@@ -317,15 +313,15 @@
       RegsUsed[Var->getRegNum()] = true;
       continue;
     }
-    // An argument either does not need a stack slot (if passed in a
-    // register) or already has one (if passed on the stack).
+    // An argument either does not need a stack slot (if passed in a register)
+    // or already has one (if passed on the stack).
     if (Var->getIsArg())
       continue;
     // An unreferenced variable doesn't need a stack slot.
     if (!IsVarReferenced[Var->getIndex()])
       continue;
-    // Check a target-specific variable (it may end up sharing stack slots)
-    // and not need accounting here.
+    // Check a target-specific variable (it may end up sharing stack slots) and
+    // not need accounting here.
     if (TargetVarHook(Var))
       continue;
     SpilledVariables.push_back(Var);
@@ -336,8 +332,8 @@
 
   for (Variable *Var : SortedSpilledVariables) {
     size_t Increment = typeWidthInBytesOnStack(Var->getType());
-    // We have sorted by alignment, so the first variable we encounter that
-    // is located in each area determines the max alignment for the area.
+    // We have sorted by alignment, so the first variable we encounter that is
+    // located in each area determines the max alignment for the area.
     if (!*SpillAreaAlignmentBytes)
       *SpillAreaAlignmentBytes = Increment;
     if (SimpleCoalescing && VMetadata->isTracked(Var)) {
@@ -373,8 +369,8 @@
     *SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
   }
 
-  // If there are separate globals and locals areas, make sure the
-  // locals area is aligned by padding the end of the globals area.
+  // If there are separate globals and locals areas, make sure the locals area
+  // is aligned by padding the end of the globals area.
   if (LocalsSlotsAlignmentBytes) {
     uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
     GlobalsAndSubsequentPaddingSize =
@@ -391,11 +387,11 @@
   const VariablesMetadata *VMetadata = Func->getVMetadata();
   // For testing legalization of large stack offsets on targets with limited
   // offset bits in instruction encodings, add some padding. This assumes that
-  // SpillAreaSizeBytes has accounted for the extra test padding.
-  // When UseFramePointer is true, the offset depends on the padding,
-  // not just the SpillAreaSizeBytes. On the other hand, when UseFramePointer
-  // is false, the offsets depend on the gap between SpillAreaSizeBytes
-  // and SpillAreaPaddingBytes, so we don't increment that.
+  // SpillAreaSizeBytes has accounted for the extra test padding. When
+  // UseFramePointer is true, the offset depends on the padding, not just the
+  // SpillAreaSizeBytes. On the other hand, when UseFramePointer is false, the
+  // offsets depend on the gap between SpillAreaSizeBytes and
+  // SpillAreaPaddingBytes, so we don't increment that.
   size_t TestPadding = Ctx->getFlags().getTestStackExtra();
   if (UsesFramePointer)
     SpillAreaPaddingBytes += TestPadding;
@@ -506,8 +502,8 @@
   if (!BuildDefs::dump())
     return;
 
-  // If external and not initialized, this must be a cross test.
-  // Don't generate a declaration for such cases.
+  // If external and not initialized, this must be a cross test. Don't generate
+  // a declaration for such cases.
   const bool IsExternal =
       Var.isExternal() || Ctx->getFlags().getDisableInternal();
   if (IsExternal && !Var.hasInitializer())
@@ -577,10 +573,10 @@
       }
     }
   } else {
-    // NOTE: for non-constant zero initializers, this is BSS (no bits),
-    // so an ELF writer would not write to the file, and only track
-    // virtual offsets, but the .s writer still needs this .zero and
-    // cannot simply use the .size to advance offsets.
+    // NOTE: for non-constant zero initializers, this is BSS (no bits), so an
+    // ELF writer would not write to the file, and only track virtual offsets,
+    // but the .s writer still needs this .zero and cannot simply use the .size
+    // to advance offsets.
     Str << "\t.zero\t" << Size << "\n";
   }
 
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 7184ff0..71b6ca2 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -9,12 +9,11 @@
 ///
 /// \file
 /// This file declares the TargetLowering, LoweringContext, and
-/// TargetDataLowering classes.  TargetLowering is an abstract class
-/// used to drive the translation/lowering process.  LoweringContext
-/// maintains a context for lowering each instruction, offering
-/// conveniences such as iterating over non-deleted instructions.
-/// TargetDataLowering is an abstract class used to drive the
-/// lowering/emission of global initializers, external global
+/// TargetDataLowering classes. TargetLowering is an abstract class used to
+/// drive the translation/lowering process. LoweringContext maintains a context
+/// for lowering each instruction, offering conveniences such as iterating over
+/// non-deleted instructions. TargetDataLowering is an abstract class used to
+/// drive the lowering/emission of global initializers, external global
 /// declarations, and internal constant pools.
 ///
 //===----------------------------------------------------------------------===//
@@ -29,12 +28,11 @@
 
 namespace Ice {
 
-/// LoweringContext makes it easy to iterate through non-deleted
-/// instructions in a node, and insert new (lowered) instructions at
-/// the current point.  Along with the instruction list container and
-/// associated iterators, it holds the current node, which is needed
-/// when inserting new instructions in order to track whether variables
-/// are used as single-block or multi-block.
+/// LoweringContext makes it easy to iterate through non-deleted instructions in
+/// a node, and insert new (lowered) instructions at the current point. Along
+/// with the instruction list container and associated iterators, it holds the
+/// current node, which is needed when inserting new instructions in order to
+/// track whether variables are used as single-block or multi-block.
 class LoweringContext {
   LoweringContext(const LoweringContext &) = delete;
   LoweringContext &operator=(const LoweringContext &) = delete;
@@ -72,17 +70,16 @@
   /// Node is the argument to Inst::updateVars().
   CfgNode *Node = nullptr;
   Inst *LastInserted = nullptr;
-  /// Cur points to the current instruction being considered.  It is
-  /// guaranteed to point to a non-deleted instruction, or to be End.
+  /// Cur points to the current instruction being considered. It is guaranteed
+  /// to point to a non-deleted instruction, or to be End.
   InstList::iterator Cur;
-  /// Next doubles as a pointer to the next valid instruction (if any),
-  /// and the new-instruction insertion point.  It is also updated for
-  /// the caller in case the lowering consumes more than one high-level
-  /// instruction.  It is guaranteed to point to a non-deleted
-  /// instruction after Cur, or to be End.  TODO: Consider separating
-  /// the notion of "next valid instruction" and "new instruction
-  /// insertion point", to avoid confusion when previously-deleted
-  /// instructions come between the two points.
+  /// Next doubles as a pointer to the next valid instruction (if any), and the
+  /// new-instruction insertion point. It is also updated for the caller in case
+  /// the lowering consumes more than one high-level instruction. It is
+  /// guaranteed to point to a non-deleted instruction after Cur, or to be End.
+  // TODO: Consider separating the notion of "next valid instruction" and "new
+  // instruction insertion point", to avoid confusion when previously-deleted
+  // instructions come between the two points.
   InstList::iterator Next;
   /// Begin is a copy of Insts.begin(), used if iterators are moved backward.
   InstList::iterator Begin;
@@ -159,24 +156,22 @@
   /// Inserts and lowers a single high-level instruction at a specific insertion
   /// point.
   void lowerInst(CfgNode *Node, InstList::iterator Next, InstHighLevel *Instr);
-  /// Does preliminary lowering of the set of Phi instructions in the
-  /// current node.  The main intention is to do what's needed to keep
-  /// the unlowered Phi instructions consistent with the lowered
-  /// non-Phi instructions, e.g. to lower 64-bit operands on a 32-bit
-  /// target.
+  /// Does preliminary lowering of the set of Phi instructions in the current
+  /// node. The main intention is to do what's needed to keep the unlowered Phi
+  /// instructions consistent with the lowered non-Phi instructions, e.g. to
+  /// lower 64-bit operands on a 32-bit target.
   virtual void prelowerPhis() {}
-  /// Tries to do branch optimization on a single instruction.  Returns
-  /// true if some optimization was done.
+  /// Tries to do branch optimization on a single instruction. Returns true if
+  /// some optimization was done.
   virtual bool doBranchOpt(Inst * /*I*/, const CfgNode * /*NextNode*/) {
     return false;
   }
 
   virtual SizeT getNumRegisters() const = 0;
-  /// Returns a variable pre-colored to the specified physical
-  /// register.  This is generally used to get very direct access to
-  /// the register such as in the prolog or epilog or for marking
-  /// scratch registers as killed by a call.  If a Type is not
-  /// provided, a target-specific default type is used.
+  /// Returns a variable pre-colored to the specified physical register. This is
+  /// generally used to get very direct access to the register such as in the
+  /// prolog or epilog or for marking scratch registers as killed by a call. If
+  /// a Type is not provided, a target-specific default type is used.
   virtual Variable *getPhysicalRegister(SizeT RegNum,
                                         Type Ty = IceType_void) = 0;
   /// Returns a printable name for the register.
@@ -187,8 +182,8 @@
   virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
 
   bool hasComputedFrame() const { return HasComputedFrame; }
-  /// Returns true if this function calls a function that has the
-  /// "returns twice" attribute.
+  /// Returns true if this function calls a function that has the "returns
+  /// twice" attribute.
   bool callsReturnsTwice() const { return CallsReturnsTwice; }
   void setCallsReturnsTwice(bool RetTwice) { CallsReturnsTwice = RetTwice; }
   int32_t getStackAdjustment() const { return StackAdjustment; }
@@ -220,10 +215,10 @@
                                 const llvm::SmallBitVector &ExcludeRegisters,
                                 uint64_t Salt) const = 0;
 
-  /// Save/restore any mutable state for the situation where code
-  /// emission needs multiple passes, such as sandboxing or relaxation.
-  /// Subclasses may provide their own implementation, but should be
-  /// sure to also call the parent class's methods.
+  /// Save/restore any mutable state for the situation where code emission needs
+  /// multiple passes, such as sandboxing or relaxation. Subclasses may provide
+  /// their own implementation, but should be sure to also call the parent
+  /// class's methods.
   virtual void snapshotEmitState() {
     SnapshotStackAdjustment = StackAdjustment;
   }
@@ -285,30 +280,30 @@
   virtual void doMockBoundsCheck(Operand *) {}
   virtual void randomlyInsertNop(float Probability,
                                  RandomNumberGenerator &RNG) = 0;
-  /// This gives the target an opportunity to post-process the lowered
-  /// expansion before returning.
+  /// This gives the target an opportunity to post-process the lowered expansion
+  /// before returning.
   virtual void postLower() {}
 
-  /// Find two-address non-SSA instructions and set the DestNonKillable flag
-  /// to keep liveness analysis consistent.
+  /// Find two-address non-SSA instructions and set the DestNonKillable flag to
+  /// keep liveness analysis consistent.
   void inferTwoAddress();
 
-  /// Make a pass over the Cfg to determine which variables need stack slots
-  /// and place them in a sorted list (SortedSpilledVariables). Among those,
-  /// vars, classify the spill variables as local to the basic block vs
-  /// global (multi-block) in order to compute the parameters GlobalsSize
-  /// and SpillAreaSizeBytes (represents locals or general vars if the
-  /// coalescing of locals is disallowed) along with alignments required
-  /// for variables in each area. We rely on accurate VMetadata in order to
-  /// classify a variable as global vs local (otherwise the variable is
-  /// conservatively global). The in-args should be initialized to 0.
+  /// Make a pass over the Cfg to determine which variables need stack slots and
+  /// place them in a sorted list (SortedSpilledVariables). Among those, vars,
+  /// classify the spill variables as local to the basic block vs global
+  /// (multi-block) in order to compute the parameters GlobalsSize and
+  /// SpillAreaSizeBytes (represents locals or general vars if the coalescing of
+  /// locals is disallowed) along with alignments required for variables in each
+  /// area. We rely on accurate VMetadata in order to classify a variable as
+  /// global vs local (otherwise the variable is conservatively global). The
+  /// in-args should be initialized to 0.
   ///
-  /// This is only a pre-pass and the actual stack slot assignment is
-  /// handled separately.
+  /// This is only a pre-pass and the actual stack slot assignment is handled
+  /// separately.
   ///
-  /// There may be target-specific Variable types, which will be handled
-  /// by TargetVarHook. If the TargetVarHook returns true, then the variable
-  /// is skipped and not considered with the rest of the spilled variables.
+  /// There may be target-specific Variable types, which will be handled by
+  /// TargetVarHook. If the TargetVarHook returns true, then the variable is
+  /// skipped and not considered with the rest of the spilled variables.
   void getVarStackSlotParams(VarList &SortedSpilledVariables,
                              llvm::SmallBitVector &RegsUsed,
                              size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
@@ -316,9 +311,9 @@
                              uint32_t *LocalsSlotsAlignmentBytes,
                              std::function<bool(Variable *)> TargetVarHook);
 
-  /// Calculate the amount of padding needed to align the local and global
-  /// areas to the required alignment.  This assumes the globals/locals layout
-  /// used by getVarStackSlotParams and assignVarStackSlots.
+  /// Calculate the amount of padding needed to align the local and global areas
+  /// to the required alignment. This assumes the globals/locals layout used by
+  /// getVarStackSlotParams and assignVarStackSlots.
   void alignStackSpillAreas(uint32_t SpillAreaStartOffset,
                             uint32_t SpillAreaAlignmentBytes,
                             size_t GlobalsSize,
@@ -326,21 +321,19 @@
                             uint32_t *SpillAreaPaddingBytes,
                             uint32_t *LocalsSlotsPaddingBytes);
 
-  /// Make a pass through the SortedSpilledVariables and actually assign
-  /// stack slots. SpillAreaPaddingBytes takes into account stack alignment
-  /// padding. The SpillArea starts after that amount of padding.
-  /// This matches the scheme in getVarStackSlotParams, where there may
-  /// be a separate multi-block global var spill area and a local var
-  /// spill area.
+  /// Make a pass through the SortedSpilledVariables and actually assign stack
+  /// slots. SpillAreaPaddingBytes takes into account stack alignment padding.
+  /// The SpillArea starts after that amount of padding. This matches the scheme
+  /// in getVarStackSlotParams, where there may be a separate multi-block global
+  /// var spill area and a local var spill area.
   void assignVarStackSlots(VarList &SortedSpilledVariables,
                            size_t SpillAreaPaddingBytes,
                            size_t SpillAreaSizeBytes,
                            size_t GlobalsAndSubsequentPaddingSize,
                            bool UsesFramePointer);
 
-  /// Sort the variables in Source based on required alignment.
-  /// The variables with the largest alignment need are placed in the front
-  /// of the Dest list.
+  /// Sort the variables in Source based on required alignment. The variables
+  /// with the largest alignment need are placed in the front of the Dest list.
   void sortVarsByAlignment(VarList &Dest, const VarList &Source) const;
 
   /// Make a call to an external helper function.
@@ -362,8 +355,8 @@
   GlobalContext *Ctx;
   bool HasComputedFrame = false;
   bool CallsReturnsTwice = false;
-  /// StackAdjustment keeps track of the current stack offset from its
-  /// natural location, as arguments are pushed for a function call.
+  /// StackAdjustment keeps track of the current stack offset from its natural
+  /// location, as arguments are pushed for a function call.
   int32_t StackAdjustment = 0;
   SizeT NextLabelNumber = 0;
   SizeT NextJumpTableNumber = 0;
@@ -411,9 +404,9 @@
   int32_t SnapshotStackAdjustment = 0;
 };
 
-/// TargetDataLowering is used for "lowering" data including initializers
-/// for global variables, and the internal constant pools.  It is separated
-/// out from TargetLowering because it does not require a Cfg.
+/// TargetDataLowering is used for "lowering" data including initializers for
+/// global variables, and the internal constant pools. It is separated out from
+/// TargetLowering because it does not require a Cfg.
 class TargetDataLowering {
   TargetDataLowering() = delete;
   TargetDataLowering(const TargetDataLowering &) = delete;
@@ -432,8 +425,8 @@
   void emitGlobal(const VariableDeclaration &Var,
                   const IceString &SectionSuffix);
 
-  /// For now, we assume .long is the right directive for emitting 4 byte
-  /// emit global relocations. However, LLVM MIPS usually uses .4byte instead.
+  /// For now, we assume .long is the right directive for emitting 4 byte emit
+  /// global relocations. However, LLVM MIPS usually uses .4byte instead.
   /// Perhaps there is some difference when the location is unaligned.
   static const char *getEmit32Directive() { return ".long"; }
 
@@ -441,9 +434,9 @@
   GlobalContext *Ctx;
 };
 
-/// TargetHeaderLowering is used to "lower" the header of an output file.
-/// It writes out the target-specific header attributes. E.g., for ARM
-/// this writes out the build attributes (float ABI, etc.).
+/// TargetHeaderLowering is used to "lower" the header of an output file. It
+/// writes out the target-specific header attributes. E.g., for ARM this writes
+/// out the build attributes (float ABI, etc.).
 class TargetHeaderLowering {
   TargetHeaderLowering() = delete;
   TargetHeaderLowering(const TargetHeaderLowering &) = delete;
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 0634e45..fef145f 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -47,7 +47,7 @@
   } while (0)
 
 // The following table summarizes the logic for lowering the icmp instruction
-// for i32 and narrower types.  Each icmp condition has a clear mapping to an
+// for i32 and narrower types. Each icmp condition has a clear mapping to an
 // ARM32 conditional move instruction.
 
 const struct TableIcmp32_ {
@@ -62,8 +62,8 @@
 
 // The following table summarizes the logic for lowering the icmp instruction
 // for the i64 type. Two conditional moves are needed for setting to 1 or 0.
-// The operands may need to be swapped, and there is a slight difference
-// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
+// The operands may need to be swapped, and there is a slight difference for
+// signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
 const struct TableIcmp64_ {
   bool IsSigned;
   bool Swapped;
@@ -82,18 +82,16 @@
   return TableIcmp32[Index].Mapping;
 }
 
-// In some cases, there are x-macros tables for both high-level and
-// low-level instructions/operands that use the same enum key value.
-// The tables are kept separate to maintain a proper separation
-// between abstraction layers.  There is a risk that the tables could
-// get out of sync if enum values are reordered or if entries are
-// added or deleted.  The following dummy namespaces use
+// In some cases, there are x-macros tables for both high-level and low-level
+// instructions/operands that use the same enum key value. The tables are kept
+// separate to maintain a proper separation between abstraction layers. There
+// is a risk that the tables could get out of sync if enum values are reordered
+// or if entries are added or deleted. The following dummy namespaces use
 // static_asserts to ensure everything is kept in sync.
 
 // Validate the enum values in ICMPARM32_TABLE.
 namespace dummy1 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
 enum _tmp_enum {
 #define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
   ICMPARM32_TABLE
@@ -104,8 +102,8 @@
 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
 ICEINSTICMP_TABLE
 #undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
 #define X(val, signed, swapped64, C_32, C1_64, C2_64)                          \
   static const int _table2_##val = _tmp_##val;                                 \
   static_assert(                                                               \
@@ -113,8 +111,8 @@
       "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
 ICMPARM32_TABLE
 #undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
 #define X(tag, str)                                                            \
   static_assert(                                                               \
       _table1_##tag == _table2_##tag,                                          \
@@ -126,17 +124,17 @@
 // Stack alignment
 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
 
-// Value is in bytes. Return Value adjusted to the next highest multiple
-// of the stack alignment.
+// Value is in bytes. Return Value adjusted to the next highest multiple of the
+// stack alignment.
 uint32_t applyStackAlignment(uint32_t Value) {
   return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
 }
 
-// Value is in bytes. Return Value adjusted to the next highest multiple
-// of the stack alignment required for the given type.
+// Value is in bytes. Return Value adjusted to the next highest multiple of the
+// stack alignment required for the given type.
 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
-  // Use natural alignment, except that normally (non-NaCl) ARM only
-  // aligns vectors to 8 bytes.
+  // Use natural alignment, except that normally (non-NaCl) ARM only aligns
+  // vectors to 8 bytes.
   // TODO(jvoung): Check this ...
   size_t typeAlignInBytes = typeWidthInBytes(Ty);
   if (isVectorType(Ty))
@@ -172,9 +170,8 @@
 
 TargetARM32::TargetARM32(Cfg *Func)
     : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
-  // TODO: Don't initialize IntegerRegisters and friends every time.
-  // Instead, initialize in some sort of static initializer for the
-  // class.
+  // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
+  // initialize in some sort of static initializer for the class.
   // Limit this size (or do all bitsets need to be the same width)???
   llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
   llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);
@@ -243,19 +240,18 @@
   // Argument lowering
   Func->doArgLowering();
 
-  // Target lowering.  This requires liveness analysis for some parts
-  // of the lowering decisions, such as compare/branch fusing.  If
-  // non-lightweight liveness analysis is used, the instructions need
-  // to be renumbered first.  TODO: This renumbering should only be
-  // necessary if we're actually calculating live intervals, which we
-  // only do for register allocation.
+  // Target lowering. This requires liveness analysis for some parts of the
+  // lowering decisions, such as compare/branch fusing. If non-lightweight
+  // liveness analysis is used, the instructions need to be renumbered first.
+  // TODO: This renumbering should only be necessary if we're actually
+  // calculating live intervals, which we only do for register allocation.
   Func->renumberInstructions();
   if (Func->hasError())
     return;
 
-  // TODO: It should be sufficient to use the fastest liveness
-  // calculation, i.e. livenessLightweight().  However, for some
-  // reason that slows down the rest of the translation.  Investigate.
+  // TODO: It should be sufficient to use the fastest liveness calculation,
+  // i.e. livenessLightweight(). However, for some reason that slows down the
+  // rest of the translation. Investigate.
   Func->liveness(Liveness_Basic);
   if (Func->hasError())
     return;
@@ -266,19 +262,19 @@
     return;
   Func->dump("After ARM32 codegen");
 
-  // Register allocation.  This requires instruction renumbering and
-  // full liveness analysis.
+  // Register allocation. This requires instruction renumbering and full
+  // liveness analysis.
   Func->renumberInstructions();
   if (Func->hasError())
     return;
   Func->liveness(Liveness_Intervals);
   if (Func->hasError())
     return;
-  // Validate the live range computations.  The expensive validation
-  // call is deliberately only made when assertions are enabled.
+  // Validate the live range computations. The expensive validation call is
+  // deliberately only made when assertions are enabled.
   assert(Func->validateLiveness());
-  // The post-codegen dump is done here, after liveness analysis and
-  // associated cleanup, to make the dump cleaner and more useful.
+  // The post-codegen dump is done here, after liveness analysis and associated
+  // cleanup, to make the dump cleaner and more useful.
   Func->dump("After initial ARM32 codegen");
   Func->getVMetadata()->init(VMK_All);
   regAlloc(RAK_Global);
@@ -305,11 +301,10 @@
   Func->contractEmptyNodes();
   Func->reorderNodes();
 
-  // Branch optimization.  This needs to be done just before code
-  // emission.  In particular, no transformations that insert or
-  // reorder CfgNodes should be done after branch optimization.  We go
-  // ahead and do it before nop insertion to reduce the amount of work
-  // needed for searching for opportunities.
+  // Branch optimization. This needs to be done just before code emission. In
+  // particular, no transformations that insert or reorder CfgNodes should be
+  // done after branch optimization. We go ahead and do it before nop insertion
+  // to reduce the amount of work needed for searching for opportunities.
   Func->doBranchOpt();
   Func->dump("After branch optimization");
 
@@ -395,8 +390,8 @@
     Reg = Func->makeVariable(Ty);
     Reg->setRegNum(RegNum);
     PhysicalRegisters[Ty][RegNum] = Reg;
-    // Specially mark SP and LR as an "argument" so that it is considered
-    // live upon function entry.
+    // Specially mark SP and LR as an "argument" so that it is considered live
+    // upon function entry.
     if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) {
       Func->addImplicitArg(Reg);
       Reg->setIgnoreLiveness();
@@ -445,15 +440,15 @@
   if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
     return false;
   int32_t RegLo, RegHi;
-  // Always start i64 registers at an even register, so this may end
-  // up padding away a register.
+  // Always start i64 registers at an even register, so this may end up padding
+  // away a register.
   NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2);
   RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
   ++NumGPRRegsUsed;
   RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
   ++NumGPRRegsUsed;
-  // If this bumps us past the boundary, don't allocate to a register
-  // and leave any previously speculatively consumed registers as consumed.
+  // If this bumps us past the boundary, don't allocate to a register and leave
+  // any previously speculatively consumed registers as consumed.
   if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
     return false;
   Regs->first = RegLo;
@@ -474,15 +469,15 @@
     return false;
   if (isVectorType(Ty)) {
     NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4);
-    // Q registers are declared in reverse order, so
-    // RegARM32::Reg_q0 > RegARM32::Reg_q1. Therefore, we need to subtract
-    // NumFPRegUnits from Reg_q0. Same thing goes for D registers.
+    // Q registers are declared in reverse order, so RegARM32::Reg_q0 >
+    // RegARM32::Reg_q1. Therefore, we need to subtract NumFPRegUnits from
+    // Reg_q0. Same thing goes for D registers.
     static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
                   "ARM32 Q registers are possibly declared incorrectly.");
     *Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4);
     NumFPRegUnits += 4;
-    // If this bumps us past the boundary, don't allocate to a register
-    // and leave any previously speculatively consumed registers as consumed.
+    // If this bumps us past the boundary, don't allocate to a register and
+    // leave any previously speculatively consumed registers as consumed.
     if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
       return false;
   } else if (Ty == IceType_f64) {
@@ -491,8 +486,8 @@
     NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2);
     *Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2);
     NumFPRegUnits += 2;
-    // If this bumps us past the boundary, don't allocate to a register
-    // and leave any previously speculatively consumed registers as consumed.
+    // If this bumps us past the boundary, don't allocate to a register and
+    // leave any previously speculatively consumed registers as consumed.
     if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
       return false;
   } else {
@@ -509,9 +504,9 @@
   VarList &Args = Func->getArgs();
   TargetARM32::CallingConv CC;
 
-  // For each register argument, replace Arg in the argument list with the
-  // home register.  Then generate an instruction in the prolog to copy the
-  // home register to the assigned location of Arg.
+  // For each register argument, replace Arg in the argument list with the home
+  // register. Then generate an instruction in the prolog to copy the home
+  // register to the assigned location of Arg.
   Context.init(Func->getEntryNode());
   Context.setInsertPoint(Context.getCur());
 
@@ -568,13 +563,12 @@
 
 // Helper function for addProlog().
 //
-// This assumes Arg is an argument passed on the stack.  This sets the
-// frame offset for Arg and updates InArgsSizeBytes according to Arg's
-// width.  For an I64 arg that has been split into Lo and Hi components,
-// it calls itself recursively on the components, taking care to handle
-// Lo first because of the little-endian architecture.  Lastly, this
-// function generates an instruction to copy Arg into its assigned
-// register if applicable.
+// This assumes Arg is an argument passed on the stack. This sets the frame
+// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
+// I64 arg that has been split into Lo and Hi components, it calls itself
+// recursively on the components, taking care to handle Lo first because of the
+// little-endian architecture. Lastly, this function generates an instruction
+// to copy Arg into its assigned register if applicable.
 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
                                          size_t BasicFrameOffset,
                                          size_t &InArgsSizeBytes) {
@@ -591,8 +585,8 @@
   InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
   Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
   InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
-  // If the argument variable has been assigned a register, we need to load
-  // the value from the stack slot.
+  // If the argument variable has been assigned a register, we need to load the
+  // value from the stack slot.
   if (Arg->hasReg()) {
     assert(Ty != IceType_i64);
     OperandARM32Mem *Mem = OperandARM32Mem::create(
@@ -606,10 +600,9 @@
     } else {
       _ldr(Arg, Mem);
     }
-    // This argument-copying instruction uses an explicit
-    // OperandARM32Mem operand instead of a Variable, so its
-    // fill-from-stack operation has to be tracked separately for
-    // statistics.
+    // This argument-copying instruction uses an explicit OperandARM32Mem
+    // operand instead of a Variable, so its fill-from-stack operation has to
+    // be tracked separately for statistics.
     Ctx->statsUpdateFills();
   }
 }
@@ -642,16 +635,15 @@
   //  * GlobalsAndSubsequentPaddingSize: areas 3 - 4
   //  * LocalsSpillAreaSize:    area 5
   //  * SpillAreaSizeBytes:     areas 2 - 6
-  // Determine stack frame offsets for each Variable without a
-  // register assignment.  This can be done as one variable per stack
-  // slot.  Or, do coalescing by running the register allocator again
-  // with an infinite set of registers (as a side effect, this gives
-  // variables a second chance at physical register assignment).
+  // Determine stack frame offsets for each Variable without a register
+  // assignment.  This can be done as one variable per stack slot.  Or, do
+  // coalescing by running the register allocator again with an infinite set of
+  // registers (as a side effect, this gives variables a second chance at
+  // physical register assignment).
   //
-  // A middle ground approach is to leverage sparsity and allocate one
-  // block of space on the frame for globals (variables with
-  // multi-block lifetime), and one block to share for locals
-  // (single-block lifetime).
+  // A middle ground approach is to leverage sparsity and allocate one block of
+  // space on the frame for globals (variables with multi-block lifetime), and
+  // one block to share for locals (single-block lifetime).
 
   Context.init(Node);
   Context.setInsertPoint(Context.getCur());
@@ -661,14 +653,13 @@
   RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
   VarList SortedSpilledVariables;
   size_t GlobalsSize = 0;
-  // If there is a separate locals area, this represents that area.
-  // Otherwise it counts any variable not counted by GlobalsSize.
+  // If there is a separate locals area, this represents that area. Otherwise
+  // it counts any variable not counted by GlobalsSize.
   SpillAreaSizeBytes = 0;
-  // If there is a separate locals area, this specifies the alignment
-  // for it.
+  // If there is a separate locals area, this specifies the alignment for it.
   uint32_t LocalsSlotsAlignmentBytes = 0;
-  // The entire spill locations area gets aligned to largest natural
-  // alignment of the variables that have a spill slot.
+  // The entire spill locations area gets aligned to largest natural alignment
+  // of the variables that have a spill slot.
   uint32_t SpillAreaAlignmentBytes = 0;
   // For now, we don't have target-specific variables that need special
   // treatment (no stack-slot-linked SpillVariable type).
@@ -682,12 +673,11 @@
   uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
   SpillAreaSizeBytes += GlobalsSize;
 
-  // Add push instructions for preserved registers.
-  // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).
-  // Unlike x86, ARM also has callee-saved float/vector registers.
-  // The "vpush" instruction can handle a whole list of float/vector
-  // registers, but it only handles contiguous sequences of registers
-  // by specifying the start and the length.
+  // Add push instructions for preserved registers. On ARM, "push" can push a
+  // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
+  // callee-saved float/vector registers. The "vpush" instruction can handle a
+  // whole list of float/vector registers, but it only handles contiguous
+  // sequences of registers by specifying the start and the length.
   VarList GPRsToPreserve;
   GPRsToPreserve.reserve(CalleeSaves.size());
   uint32_t NumCallee = 0;
@@ -704,8 +694,8 @@
   }
   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
     if (CalleeSaves[i] && RegsUsed[i]) {
-      // TODO(jvoung): do separate vpush for each floating point
-      // register segment and += 4, or 8 depending on type.
+      // TODO(jvoung): do separate vpush for each floating point register
+      // segment and += 4, or 8 depending on type.
       ++NumCallee;
       PreservedRegsSizeBytes += 4;
       GPRsToPreserve.push_back(getPhysicalRegister(i));
@@ -724,10 +714,10 @@
     Context.insert(InstFakeUse::create(Func, FP));
   }
 
-  // Align the variables area. SpillAreaPaddingBytes is the size of
-  // the region after the preserved registers and before the spill areas.
-  // LocalsSlotsPaddingBytes is the amount of padding between the globals
-  // and locals area if they are separate.
+  // Align the variables area. SpillAreaPaddingBytes is the size of the region
+  // after the preserved registers and before the spill areas.
+  // LocalsSlotsPaddingBytes is the amount of padding between the globals and
+  // locals area if they are separate.
   assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
   assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
   uint32_t SpillAreaPaddingBytes = 0;
@@ -758,9 +748,9 @@
 
   resetStackAdjustment();
 
-  // Fill in stack offsets for stack args, and copy args into registers
-  // for those that were register-allocated.  Args are pushed right to
-  // left, so Arg[0] is closest to the stack/frame pointer.
+  // Fill in stack offsets for stack args, and copy args into registers for
+  // those that were register-allocated. Args are pushed right to left, so
+  // Arg[0] is closest to the stack/frame pointer.
   Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
   size_t BasicFrameOffset = PreservedRegsSizeBytes;
   if (!UsesFramePointer)
@@ -830,8 +820,8 @@
   if (RI == E)
     return;
 
-  // Convert the reverse_iterator position into its corresponding
-  // (forward) iterator position.
+  // Convert the reverse_iterator position into its corresponding (forward)
+  // iterator position.
   InstList::iterator InsertPoint = RI.base();
   --InsertPoint;
   Context.init(Node);
@@ -840,9 +830,9 @@
   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
   if (UsesFramePointer) {
     Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
-    // For late-stage liveness analysis (e.g. asm-verbose mode),
-    // adding a fake use of SP before the assignment of SP=FP keeps
-    // previous SP adjustments from being dead-code eliminated.
+    // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
+    // use of SP before the assignment of SP=FP keeps previous SP adjustments
+    // from being dead-code eliminated.
     Context.insert(InstFakeUse::create(Func, SP));
     _mov(SP, FP);
   } else {
@@ -868,8 +858,8 @@
   if (!MaybeLeafFunc) {
     CalleeSaves[RegARM32::Reg_lr] = true;
   }
-  // Pop registers in ascending order just like push
-  // (instead of in reverse order).
+  // Pop registers in ascending order just like push (instead of in reverse
+  // order).
   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
     if (CalleeSaves[i] && RegsUsed[i]) {
       GPRsToRestore.push_back(getPhysicalRegister(i));
@@ -903,17 +893,16 @@
 
 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const {
   constexpr bool SignExt = false;
-  // TODO(jvoung): vldr of FP stack slots has a different limit from the
-  // plain stackSlotType().
+  // TODO(jvoung): vldr of FP stack slots has a different limit from the plain
+  // stackSlotType().
   return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset);
 }
 
 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var,
                                                  Variable *OrigBaseReg) {
   int32_t Offset = Var->getStackOffset();
-  // Legalize will likely need a movw/movt combination, but if the top
-  // bits are all 0 from negating the offset and subtracting, we could
-  // use that instead.
+  // Legalize will likely need a movw/movt combination, but if the top bits are
+  // all 0 from negating the offset and subtracting, we could use that instead.
   bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
   if (ShouldSub)
     Offset = -Offset;
@@ -949,15 +938,15 @@
     return;
   Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
   int32_t StackAdjust = 0;
-  // Do a fairly naive greedy clustering for now.  Pick the first stack slot
+  // Do a fairly naive greedy clustering for now. Pick the first stack slot
   // that's out of bounds and make a new base reg using the architecture's temp
-  // register. If that works for the next slot, then great. Otherwise, create
-  // a new base register, clobbering the previous base register.  Never share a
-  // base reg across different basic blocks.  This isn't ideal if local and
+  // register. If that works for the next slot, then great. Otherwise, create a
+  // new base register, clobbering the previous base register. Never share a
+  // base reg across different basic blocks. This isn't ideal if local and
   // multi-block variables are far apart and their references are interspersed.
-  // It may help to be more coordinated about assign stack slot numbers
-  // and may help to assign smaller offsets to higher-weight variables
-  // so that they don't depend on this legalization.
+  // It may help to be more coordinated about assign stack slot numbers and may
+  // help to assign smaller offsets to higher-weight variables so that they
+  // don't depend on this legalization.
   for (CfgNode *Node : Func->getNodes()) {
     Context.init(Node);
     StackVariable *NewBaseReg = nullptr;
@@ -986,7 +975,7 @@
           continue;
         }
       }
-      // For now, only Mov instructions can have stack variables.  We need to
+      // For now, only Mov instructions can have stack variables. We need to
       // know the type of instruction because we currently create a fresh one
       // to replace Dest/Source, rather than mutate in place.
       auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr);
@@ -1117,15 +1106,15 @@
         static_cast<uint32_t>(Const->getValue() >> 32));
   }
   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
-    // Conservatively disallow memory operands with side-effects
-    // in case of duplication.
+    // Conservatively disallow memory operands with side-effects in case of
+    // duplication.
     assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
            Mem->getAddrMode() == OperandARM32Mem::NegOffset);
     const Type SplitType = IceType_i32;
     if (Mem->isRegReg()) {
       // We have to make a temp variable T, and add 4 to either Base or Index.
-      // The Index may be shifted, so adding 4 can mean something else.
-      // Thus, prefer T := Base + 4, and use T as the new Base.
+      // The Index may be shifted, so adding 4 can mean something else. Thus,
+      // prefer T := Base + 4, and use T as the new Base.
       Variable *Base = Mem->getBase();
       Constant *Four = Ctx->getConstantInt32(4);
       Variable *NewBase = Func->makeVariable(Base->getType());
@@ -1144,8 +1133,8 @@
         // We have to make a temp variable and add 4 to either Base or Offset.
         // If we add 4 to Offset, this will convert a non-RegReg addressing
         // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
-        // RegReg addressing modes, prefer adding to base and replacing instead.
-        // Thus we leave the old offset alone.
+        // RegReg addressing modes, prefer adding to base and replacing
+        // instead. Thus we leave the old offset alone.
         Constant *Four = Ctx->getConstantInt32(4);
         Variable *NewBase = Func->makeVariable(Base->getType());
         lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
@@ -1195,11 +1184,11 @@
 
 void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
   UsesFramePointer = true;
-  // Conservatively require the stack to be aligned.  Some stack
-  // adjustment operations implemented below assume that the stack is
-  // aligned before the alloca.  All the alloca code ensures that the
-  // stack alignment is preserved after the alloca.  The stack alignment
-  // restriction can be relaxed in some cases.
+  // Conservatively require the stack to be aligned. Some stack adjustment
+  // operations implemented below assume that the stack is aligned before the
+  // alloca. All the alloca code ensures that the stack alignment is preserved
+  // after the alloca. The stack alignment restriction can be relaxed in some
+  // cases.
   NeedsStackAlignment = true;
 
   // TODO(stichnot): minimize the number of adjustments of SP, etc.
@@ -1226,8 +1215,8 @@
     Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
     _sub(SP, SP, SubAmount);
   } else {
-    // Non-constant sizes need to be adjusted to the next highest
-    // multiple of the required alignment at runtime.
+    // Non-constant sizes need to be adjusted to the next highest multiple of
+    // the required alignment at runtime.
     TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
     Variable *T = makeReg(IceType_i32);
     _mov(T, TotalSize);
@@ -1265,8 +1254,8 @@
   case IceType_i64: {
     Variable *ScratchReg = makeReg(IceType_i32);
     _orrs(ScratchReg, SrcLoReg, SrcHi);
-    // ScratchReg isn't going to be used, but we need the
-    // side-effect of setting flags from this operation.
+    // ScratchReg isn't going to be used, but we need the side-effect of
+    // setting flags from this operation.
     Context.insert(InstFakeUse::create(Func, ScratchReg));
   }
   }
@@ -1310,21 +1299,21 @@
 
 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
   Variable *Dest = Inst->getDest();
-  // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
-  // to legalize Src0 to flex or Src1 to flex and there is a reversible
-  // instruction. E.g., reverse subtract with immediate, register vs
-  // register, immediate.
-  // Or it may be the case that the operands aren't swapped, but the
-  // bits can be flipped and a different operation applied.
-  // E.g., use BIC (bit clear) instead of AND for some masks.
+  // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to
+  // legalize Src0 to flex or Src1 to flex and there is a reversible
+  // instruction. E.g., reverse subtract with immediate, register vs register,
+  // immediate.
+  // Or it may be the case that the operands aren't swapped, but the bits can
+  // be flipped and a different operation applied. E.g., use BIC (bit clear)
+  // instead of AND for some masks.
   Operand *Src0 = legalizeUndef(Inst->getSrc(0));
   Operand *Src1 = legalizeUndef(Inst->getSrc(1));
   if (Dest->getType() == IceType_i64) {
-    // These helper-call-involved instructions are lowered in this
-    // separate switch. This is because we would otherwise assume that
-    // we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused
-    // with helper calls, and such unused/redundant instructions will fail
-    // liveness analysis under -Om1 setting.
+    // These helper-call-involved instructions are lowered in this separate
+    // switch. This is because we would otherwise assume that we need to
+    // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
+    // helper calls, and such unused/redundant instructions will fail liveness
+    // analysis under -Om1 setting.
     switch (Inst->getOp()) {
     default:
       break;
@@ -1332,11 +1321,10 @@
     case InstArithmetic::Sdiv:
     case InstArithmetic::Urem:
     case InstArithmetic::Srem: {
-      // Check for divide by 0 (ARM normally doesn't trap, but we want it
-      // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized
-      // to a register, which will hide a constant source operand.
-      // Instead, check the not-yet-legalized Src1 to optimize-out a divide
-      // by 0 check.
+      // Check for divide by 0 (ARM normally doesn't trap, but we want it to
+      // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
+      // register, which will hide a constant source operand. Instead, check
+      // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
       if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
         if (C64->getValue() == 0) {
           _trap();
@@ -1348,8 +1336,8 @@
         div0Check(IceType_i64, Src1Lo, Src1Hi);
       }
       // Technically, ARM has their own aeabi routines, but we can use the
-      // non-aeabi routine as well.  LLVM uses __aeabi_ldivmod for div,
-      // but uses the more standard __moddi3 for rem.
+      // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
+      // the more standard __moddi3 for rem.
       const char *HelperName = "";
       switch (Inst->getOp()) {
       default:
@@ -1472,12 +1460,11 @@
       // lsl t_lo, b.lo, c.lo
       // a.lo = t_lo
       // a.hi = t_hi
-      // Can be strength-reduced for constant-shifts, but we don't do
-      // that for now.
-      // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
-      // On ARM, shifts only take the lower 8 bits of the shift register,
-      // and saturate to the range 0-32, so the negative value will
-      // saturate to 32.
+      // Can be strength-reduced for constant-shifts, but we don't do that for
+      // now.
+      // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
+      // ARM, shifts only take the lower 8 bits of the shift register, and
+      // saturate to the range 0-32, so the negative value will saturate to 32.
       Variable *T_Hi = makeReg(IceType_i32);
       Variable *Src1RLo = legalizeToReg(Src1Lo);
       Constant *ThirtyTwo = Ctx->getConstantInt32(32);
@@ -1493,8 +1480,8 @@
       _mov(DestHi, T_Hi);
       Variable *T_Lo = makeReg(IceType_i32);
       // _mov seems to sometimes have better register preferencing than lsl.
-      // Otherwise mov w/ lsl shifted register is a pseudo-instruction
-      // that maps to lsl.
+      // Otherwise mov w/ lsl shifted register is a pseudo-instruction that
+      // maps to lsl.
       _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
                                              OperandARM32::LSL, Src1RLo));
       _mov(DestLo, T_Lo);
@@ -1513,9 +1500,9 @@
     // a.hi = t_hi
     case InstArithmetic::Ashr: {
       // a=b>>c (signed) ==> ...
-      // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
-      // and the next orr should be conditioned on PLUS. The last two
-      // right shifts should also be arithmetic.
+      // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, and the
+      // next orr should be conditioned on PLUS. The last two right shifts
+      // should also be arithmetic.
       bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
       Variable *T_Lo = makeReg(IceType_i32);
       Variable *Src1RLo = legalizeToReg(Src1Lo);
@@ -1723,13 +1710,13 @@
     Operand *NewSrc;
     if (Dest->hasReg()) {
       // If Dest already has a physical register, then legalize the Src operand
-      // into a Variable with the same register assignment.  This especially
+      // into a Variable with the same register assignment. This especially
       // helps allow the use of Flex operands.
       NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
     } else {
-      // Dest could be a stack operand. Since we could potentially need
-      // to do a Store (and store can only have Register operands),
-      // legalize this to a register.
+      // Dest could be a stack operand. Since we could potentially need to do a
+      // Store (and store can only have Register operands), legalize this to a
+      // register.
       NewSrc = legalize(Src0, Legal_Reg);
     }
     if (isVectorType(Dest->getType())) {
@@ -1810,25 +1797,24 @@
     }
   }
 
-  // Adjust the parameter area so that the stack is aligned.  It is
-  // assumed that the stack is already aligned at the start of the
-  // calling sequence.
+  // Adjust the parameter area so that the stack is aligned. It is assumed that
+  // the stack is already aligned at the start of the calling sequence.
   ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
 
-  // Subtract the appropriate amount for the argument area.  This also
-  // takes care of setting the stack adjustment during emission.
+  // Subtract the appropriate amount for the argument area. This also takes
+  // care of setting the stack adjustment during emission.
   //
-  // TODO: If for some reason the call instruction gets dead-code
-  // eliminated after lowering, we would need to ensure that the
-  // pre-call and the post-call esp adjustment get eliminated as well.
+  // TODO: If for some reason the call instruction gets dead-code eliminated
+  // after lowering, we would need to ensure that the pre-call and the
+  // post-call esp adjustment get eliminated as well.
   if (ParameterAreaSizeBytes) {
     Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
                                   Legal_Reg | Legal_Flex);
     _adjust_stack(ParameterAreaSizeBytes, SubAmount);
   }
 
-  // Copy arguments that are passed on the stack to the appropriate
-  // stack locations.
+  // Copy arguments that are passed on the stack to the appropriate stack
+  // locations.
   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
   for (auto &StackArg : StackArgs) {
     ConstantInteger32 *Loc =
@@ -1850,9 +1836,9 @@
   // Copy arguments to be passed in registers to the appropriate registers.
   for (auto &GPRArg : GPRArgs) {
     Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second);
-    // Generate a FakeUse of register arguments so that they do not get
-    // dead code eliminated as a result of the FakeKill of scratch
-    // registers after the call.
+    // Generate a FakeUse of register arguments so that they do not get dead
+    // code eliminated as a result of the FakeKill of scratch registers after
+    // the call.
     Context.insert(InstFakeUse::create(Func, Reg));
   }
   for (auto &FPArg : FPArgs) {
@@ -1860,8 +1846,8 @@
     Context.insert(InstFakeUse::create(Func, Reg));
   }
 
-  // Generate the call instruction.  Assign its result to a temporary
-  // with high register allocation weight.
+  // Generate the call instruction. Assign its result to a temporary with high
+  // register allocation weight.
   Variable *Dest = Instr->getDest();
   // ReturnReg doubles as ReturnRegLo as necessary.
   Variable *ReturnReg = nullptr;
@@ -1901,12 +1887,12 @@
     }
   }
   Operand *CallTarget = Instr->getCallTarget();
-  // TODO(jvoung): Handle sandboxing.
-  // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
+  // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing =
+  // Ctx->getFlags().getUseSandboxing();
 
-  // Allow ConstantRelocatable to be left alone as a direct call,
-  // but force other constants like ConstantInteger32 to be in
-  // a register and make it an indirect call.
+  // Allow ConstantRelocatable to be left alone as a direct call, but force
+  // other constants like ConstantInteger32 to be in a register and make it an
+  // indirect call.
   if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
     CallTarget = legalize(CallTarget, Legal_Reg);
   }
@@ -1915,8 +1901,8 @@
   if (ReturnRegHi)
     Context.insert(InstFakeDef::create(Func, ReturnRegHi));
 
-  // Add the appropriate offset to SP.  The call instruction takes care
-  // of resetting the stack offset during emission.
+  // Add the appropriate offset to SP. The call instruction takes care of
+  // resetting the stack offset during emission.
   if (ParameterAreaSizeBytes) {
     Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
                                   Legal_Reg | Legal_Flex);
@@ -2024,8 +2010,8 @@
       Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
       Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
       Variable *T_Lo = makeReg(DestLo->getType());
-      // i32 and i1 can just take up the whole register.
-      // i32 doesn't need uxt, while i1 will have an and mask later anyway.
+      // i32 and i1 can just take up the whole register. i32 doesn't need uxt,
+      // while i1 will have an and mask later anyway.
       if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
         Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
         _mov(T_Lo, Src0RF);
@@ -2046,9 +2032,9 @@
       Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
       Constant *One = Ctx->getConstantInt32(1);
       Variable *T = makeReg(Dest->getType());
-      // Just use _mov instead of _uxt since all registers are 32-bit.
-      // _uxt requires the source to be a register so could have required
-      // a _mov from legalize anyway.
+      // Just use _mov instead of _uxt since all registers are 32-bit. _uxt
+      // requires the source to be a register so could have required a _mov
+      // from legalize anyway.
       _mov(T, Src0RF);
       _and(T, T, One);
       _mov(Dest, T);
@@ -2288,8 +2274,8 @@
   //   mov.<C2> t, #0              mov.<C2> t, #0
   //   mov      a, t               mov      a, t
   // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
-  // is used for signed compares. In some cases, b and c need to be swapped
-  // as well.
+  // is used for signed compares. In some cases, b and c need to be swapped as
+  // well.
   //
   // LLVM does:
   // for EQ and NE:
@@ -2299,13 +2285,13 @@
   //   mov.<C> t, #1
   //   mov  a, t
   //
-  // that's nice in that it's just as short but has fewer dependencies
-  // for better ILP at the cost of more registers.
+  // that's nice in that it's just as short but has fewer dependencies for
+  // better ILP at the cost of more registers.
   //
-  // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with
-  // two unconditional mov #0, two cmps, two conditional mov #1,
-  // and one conditonal reg mov. That has few dependencies for good ILP,
-  // but is a longer sequence.
+  // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
+  // unconditional mov #0, two cmps, two conditional mov #1, and one
+  // conditional reg mov. That has few dependencies for good ILP, but is a
+  // longer sequence.
   //
   // So, we are going with the GCC version since it's usually better (except
   // perhaps for eq/ne). We could revisit special-casing eq/ne later.
@@ -2333,8 +2319,8 @@
       Variable *ScratchReg = makeReg(IceType_i32);
       _cmp(Src0Lo, Src1LoRF);
       _sbcs(ScratchReg, Src0Hi, Src1HiRF);
-      // ScratchReg isn't going to be used, but we need the
-      // side-effect of setting flags from this operation.
+      // ScratchReg isn't going to be used, but we need the side-effect of
+      // setting flags from this operation.
       Context.insert(InstFakeUse::create(Func, ScratchReg));
     } else {
       _cmp(Src0Hi, Src1HiRF);
@@ -2354,8 +2340,8 @@
   //   mov.C1   t, #0
   //   mov.C2   t, #1
   //   mov      a, t
-  // where the unsigned/sign extension is not needed for 32-bit.
-  // They also have special cases for EQ and NE. E.g., for NE:
+  // where the unsigned/sign extension is not needed for 32-bit. They also have
+  // special cases for EQ and NE. E.g., for NE:
   //   <extend to tb, tc>
   //   subs     t, tb, tc
   //   movne    t, #1
@@ -2368,13 +2354,13 @@
   //   mov.<C> t, #1
   //   mov     a, t
   //
-  // the left shift is by 0, 16, or 24, which allows the comparison to focus
-  // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).
-  // For the unsigned case, for some reason it does similar to GCC and does
-  // a uxtb first. It's not clear to me why that special-casing is needed.
+  // the left shift is by 0, 16, or 24, which allows the comparison to focus on
+  // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
+  // the unsigned case, for some reason it does similar to GCC and does a uxtb
+  // first. It's not clear to me why that special-casing is needed.
   //
-  // We'll go with the LLVM way for now, since it's shorter and has just as
-  // few dependencies.
+  // We'll go with the LLVM way for now, since it's shorter and has just as few
+  // dependencies.
   int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
   assert(ShiftAmt >= 0);
   Constant *ShiftConst = nullptr;
@@ -2417,9 +2403,9 @@
     UnimplementedError(Func->getContext()->getFlags());
     return;
   case Intrinsics::AtomicFenceAll:
-    // NOTE: FenceAll should prevent and load/store from being moved
-    // across the fence (both atomic and non-atomic). The InstARM32Mfence
-    // instruction is currently marked coarsely as "HasSideEffects".
+    // NOTE: FenceAll should prevent and load/store from being moved across the
+    // fence (both atomic and non-atomic). The InstARM32Mfence instruction is
+    // currently marked coarsely as "HasSideEffects".
     UnimplementedError(Func->getContext()->getFlags());
     return;
   case Intrinsics::AtomicIsLockFree: {
@@ -2477,10 +2463,10 @@
     Call->addArg(Val);
     lowerCall(Call);
     // The popcount helpers always return 32-bit values, while the intrinsic's
-    // signature matches some 64-bit platform's native instructions and
-    // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest
-    // just in case the user doesn't do that in the IR or doesn't toss the bits
-    // via truncate.
+    // signature matches some 64-bit platform's native instructions and expect
+    // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in
+    // case the user doesn't do that in the IR or doesn't toss the bits via
+    // truncate.
     if (Val->getType() == IceType_i64) {
       Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
       Constant *Zero = Ctx->getConstantZero(IceType_i32);
@@ -2491,8 +2477,8 @@
     return;
   }
   case Intrinsics::Ctlz: {
-    // The "is zero undef" parameter is ignored and we always return
-    // a well-defined value.
+    // The "is zero undef" parameter is ignored and we always return a
+    // well-defined value.
     Operand *Val = Instr->getArg(0);
     Variable *ValLoR;
     Variable *ValHiR = nullptr;
@@ -2639,9 +2625,9 @@
     Variable *T2 = makeReg(IceType_i32);
     _add(T2, T, ThirtyTwo);
     _clz(T2, ValHiR, CondARM32::NE);
-    // T2 is actually a source as well when the predicate is not AL
-    // (since it may leave T2 alone). We use set_dest_nonkillable to
-    // prolong the liveness of T2 as if it was used as a source.
+    // T2 is actually a source as well when the predicate is not AL (since it
+    // may leave T2 alone). We use set_dest_nonkillable to prolong the liveness
+    // of T2 as if it was used as a source.
     _set_dest_nonkillable();
     _mov(DestLo, T2);
     Variable *T3 = nullptr;
@@ -2654,15 +2640,14 @@
 }
 
 void TargetARM32::lowerLoad(const InstLoad *Load) {
-  // A Load instruction can be treated the same as an Assign
-  // instruction, after the source operand is transformed into an
-  // OperandARM32Mem operand.
+  // A Load instruction can be treated the same as an Assign instruction, after
+  // the source operand is transformed into an OperandARM32Mem operand.
   Type Ty = Load->getDest()->getType();
   Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
   Variable *DestLoad = Load->getDest();
 
-  // TODO(jvoung): handled folding opportunities. Sign and zero extension
-  // can be folded into a load.
+  // TODO(jvoung): handled folding opportunities. Sign and zero extension can
+  // be folded into a load.
   InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
   lowerAssign(Assign);
 }
@@ -2708,17 +2693,15 @@
       _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
     }
   }
-  // Add a ret instruction even if sandboxing is enabled, because
-  // addEpilog explicitly looks for a ret instruction as a marker for
-  // where to insert the frame removal instructions.
-  // addEpilog is responsible for restoring the "lr" register as needed
-  // prior to this ret instruction.
+  // Add a ret instruction even if sandboxing is enabled, because addEpilog
+  // explicitly looks for a ret instruction as a marker for where to insert the
+  // frame removal instructions. addEpilog is responsible for restoring the
+  // "lr" register as needed prior to this ret instruction.
   _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
-  // Add a fake use of sp to make sure sp stays alive for the entire
-  // function.  Otherwise post-call sp adjustments get dead-code
-  // eliminated.  TODO: Are there more places where the fake use
-  // should be inserted?  E.g. "void f(int n){while(1) g(n);}" may not
-  // have a ret instruction.
+  // Add a fake use of sp to make sure sp stays alive for the entire function.
+  // Otherwise post-call sp adjustments get dead-code eliminated.
+  // TODO: Are there more places where the fake use should be inserted? E.g.
+  // "void f(int n){while(1) g(n);}" may not have a ret instruction.
   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
   Context.insert(InstFakeUse::create(Func, SP));
 }
@@ -2852,8 +2835,8 @@
   if (isVectorType(Ty) || isFloatingType(Ty)) {
     _vmov(Reg, Src);
   } else {
-    // Mov's Src operand can really only be the flexible second operand type
-    // or a register. Users should guarantee that.
+    // Mov's Src operand can really only be the flexible second operand type or
+    // a register. Users should guarantee that.
     _mov(Reg, Src);
   }
   return Reg;
@@ -2862,18 +2845,17 @@
 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
                                int32_t RegNum) {
   Type Ty = From->getType();
-  // Assert that a physical register is allowed.  To date, all calls
-  // to legalize() allow a physical register. Legal_Flex converts
-  // registers to the right type OperandARM32FlexReg as needed.
+  // Assert that a physical register is allowed. To date, all calls to
+  // legalize() allow a physical register. Legal_Flex converts registers to the
+  // right type OperandARM32FlexReg as needed.
   assert(Allowed & Legal_Reg);
-  // Go through the various types of operands:
-  // OperandARM32Mem, OperandARM32Flex, Constant, and Variable.
-  // Given the above assertion, if type of operand is not legal
-  // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy
-  // to a register.
+  // Go through the various types of operands: OperandARM32Mem,
+  // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
+  // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
+  // can always copy to a register.
   if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
-    // Before doing anything with a Mem operand, we need to ensure
-    // that the Base and Index components are in physical registers.
+    // Before doing anything with a Mem operand, we need to ensure that the
+    // Base and Index components are in physical registers.
     Variable *Base = Mem->getBase();
     Variable *Index = Mem->getIndex();
     Variable *RegBase = nullptr;
@@ -2918,8 +2900,8 @@
       if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
         if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
           From = FlexReg->getReg();
-          // Fall through and let From be checked as a Variable below,
-          // where it may or may not need a register.
+          // Fall through and let From be checked as a Variable below, where it
+          // may or may not need a register.
         } else {
           return copyToReg(Flex, RegNum);
         }
@@ -2944,10 +2926,10 @@
       uint32_t RotateAmt;
       uint32_t Immed_8;
       uint32_t Value = static_cast<uint32_t>(C32->getValue());
-      // Check if the immediate will fit in a Flexible second operand,
-      // if a Flexible second operand is allowed. We need to know the exact
-      // value, so that rules out relocatable constants.
-      // Also try the inverse and use MVN if possible.
+      // Check if the immediate will fit in a Flexible second operand, if a
+      // Flexible second operand is allowed. We need to know the exact value,
+      // so that rules out relocatable constants. Also try the inverse and use
+      // MVN if possible.
       if (CanBeFlex &&
           OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
         return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
@@ -2977,12 +2959,12 @@
     } else {
       assert(isScalarFloatingType(Ty));
       // Load floats/doubles from literal pool.
-      // TODO(jvoung): Allow certain immediates to be encoded directly in
-      // an operand. See Table A7-18 of the ARM manual:
-      // "Floating-point modified immediate constants".
-      // Or, for 32-bit floating point numbers, just encode the raw bits
-      // into a movw/movt pair to GPR, and vmov to an SREG, instead of using
-      // a movw/movt pair to get the const-pool address then loading to SREG.
+      // TODO(jvoung): Allow certain immediates to be encoded directly in an
+      // operand. See Table A7-18 of the ARM manual: "Floating-point modified
+      // immediate constants". Or, for 32-bit floating point numbers, just
+      // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG,
+      // instead of using a movw/movt pair to get the const-pool address then
+      // loading to SREG.
       std::string Buffer;
       llvm::raw_string_ostream StrBuf(Buffer);
       llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
@@ -2997,9 +2979,9 @@
   }
 
   if (auto Var = llvm::dyn_cast<Variable>(From)) {
-    // Check if the variable is guaranteed a physical register.  This
-    // can happen either when the variable is pre-colored or when it is
-    // assigned infinite weight.
+    // Check if the variable is guaranteed a physical register. This can happen
+    // either when the variable is pre-colored or when it is assigned infinite
+    // weight.
     bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
     // We need a new physical register for the operand if:
     //   Mem is not allowed and Var isn't guaranteed a physical
@@ -3025,17 +3007,16 @@
 Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) {
   Type Ty = From->getType();
   if (llvm::isa<ConstantUndef>(From)) {
-    // Lower undefs to zero.  Another option is to lower undefs to an
-    // uninitialized register; however, using an uninitialized register
-    // results in less predictable code.
+    // Lower undefs to zero. Another option is to lower undefs to an
+    // uninitialized register; however, using an uninitialized register results
+    // in less predictable code.
     //
-    // If in the future the implementation is changed to lower undef
-    // values to uninitialized registers, a FakeDef will be needed:
-    //     Context.insert(InstFakeDef::create(Func, Reg));
-    // This is in order to ensure that the live range of Reg is not
-    // overestimated.  If the constant being lowered is a 64 bit value,
-    // then the result should be split and the lo and hi components will
-    // need to go in uninitialized registers.
+    // If in the future the implementation is changed to lower undef values to
+    // uninitialized registers, a FakeDef will be needed:
+    // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
+    // ensure that the live range of Reg is not overestimated. If the constant
+    // being lowered is a 64 bit value, then the result should be split and the
+    // lo and hi components will need to go in uninitialized registers.
     if (isVectorType(Ty))
       return makeVectorOfZeros(Ty, RegNum);
     return Ctx->getConstantZero(Ty);
@@ -3045,15 +3026,15 @@
 
 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
   OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
-  // It may be the case that address mode optimization already creates
-  // an OperandARM32Mem, so in that case it wouldn't need another level
-  // of transformation.
+  // It may be the case that address mode optimization already creates an
+  // OperandARM32Mem, so in that case it wouldn't need another level of
+  // transformation.
   if (Mem) {
     return llvm::cast<OperandARM32Mem>(legalize(Mem));
   }
-  // If we didn't do address mode optimization, then we only
-  // have a base/offset to work with. ARM always requires a base
-  // register, so just use that to hold the operand.
+  // If we didn't do address mode optimization, then we only have a base/offset
+  // to work with. ARM always requires a base register, so just use that to
+  // hold the operand.
   Variable *Base = legalizeToReg(Operand);
   return OperandARM32Mem::create(
       Func, Ty, Base,
@@ -3076,9 +3057,9 @@
   uint32_t RotateAmt;
   uint32_t Immed_8;
   Operand *Mask;
-  // Use AND or BIC to mask off the bits, depending on which immediate fits
-  // (if it fits at all). Assume Align is usually small, in which case BIC
-  // works better. Thus, this rounds down to the alignment.
+  // Use AND or BIC to mask off the bits, depending on which immediate fits (if
+  // it fits at all). Assume Align is usually small, in which case BIC works
+  // better. Thus, this rounds down to the alignment.
   if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
     Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
     _bic(Reg, Reg, Mask);
@@ -3170,17 +3151,18 @@
   OstreamLocker L(Ctx);
   Ostream &Str = Ctx->getStrEmit();
   Str << ".syntax unified\n";
-  // Emit build attributes in format: .eabi_attribute TAG, VALUE.
-  // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture"
-  // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
+  // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
+  // "Addenda to, and Errata in the ABI for the ARM architecture"
+  // http://infocenter.arm.com
+  //                  /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
   //
-  // Tag_conformance should be be emitted first in a file-scope
-  // sub-subsection of the first public subsection of the attributes.
+  // Tag_conformance should be be emitted first in a file-scope sub-subsection
+  // of the first public subsection of the attributes.
   Str << ".eabi_attribute 67, \"2.09\"      @ Tag_conformance\n";
-  // Chromebooks are at least A15, but do A9 for higher compat.
-  // For some reason, the LLVM ARM asm parser has the .cpu directive override
-  // the mattr specified on the commandline. So to test hwdiv, we need to set
-  // the .cpu directive higher (can't just rely on --mattr=...).
+  // Chromebooks are at least A15, but do A9 for higher compat. For some
+  // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
+  // specified on the commandline. So to test hwdiv, we need to set the .cpu
+  // directive higher (can't just rely on --mattr=...).
   if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
     Str << ".cpu    cortex-a15\n";
   } else {
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 57e0b5a..5578289 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -78,8 +78,8 @@
   SizeT getReservedTmpReg() const { return RegARM32::Reg_ip; }
 
   size_t typeWidthInBytesOnStack(Type Ty) const override {
-    // Round up to the next multiple of 4 bytes.  In particular, i1,
-    // i8, and i16 are rounded up to 4 bytes.
+    // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
+    // are rounded up to 4 bytes.
     return (typeWidthInBytes(Ty) + 3) & ~3;
   }
 
@@ -101,9 +101,8 @@
   void addProlog(CfgNode *Node) override;
   void addEpilog(CfgNode *Node) override;
 
-  /// Ensure that a 64-bit Variable has been split into 2 32-bit
-  /// Variables, creating them if necessary.  This is needed for all
-  /// I64 operations.
+  /// Ensure that a 64-bit Variable has been split into 2 32-bit Variables,
+  /// creating them if necessary. This is needed for all I64 operations.
   void split64(Variable *Var);
   Operand *loOperand(Operand *Operand);
   Operand *hiOperand(Operand *Operand);
@@ -147,8 +146,8 @@
   enum OperandLegalization {
     Legal_None = 0,
     Legal_Reg = 1 << 0,  /// physical register, not stack location
-    Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated
-                         /// small immediates, or shifted registers.
+    Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
+                         /// immediates, or shifted registers.
     Legal_Mem = 1 << 2,  /// includes [r0, r1 lsl #2] as well as [sp, #12]
     Legal_All = ~Legal_None
   };
@@ -171,9 +170,8 @@
                                 const llvm::SmallBitVector &ExcludeRegisters,
                                 uint64_t Salt) const override;
 
-  // If a divide-by-zero check is needed, inserts a:
-  // test; branch .LSKIP; trap; .LSKIP: <continuation>.
-  // If no check is needed nothing is inserted.
+  // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;
+  // .LSKIP: <continuation>. If no check is needed nothing is inserted.
   void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
   using ExtInstr = void (TargetARM32::*)(Variable *, Variable *,
                                          CondARM32::Cond);
@@ -185,9 +183,9 @@
 
   void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi);
 
-  // The following are helpers that insert lowered ARM32 instructions
-  // with minimal syntactic overhead, so that the lowering code can
-  // look as close to assembly as practical.
+  // The following are helpers that insert lowered ARM32 instructions with
+  // minimal syntactic overhead, so that the lowering code can look as close to
+  // assembly as practical.
 
   void _add(Variable *Dest, Variable *Src0, Operand *Src1,
             CondARM32::Cond Pred = CondARM32::AL) {
@@ -265,9 +263,9 @@
             CondARM32::Cond Pred = CondARM32::AL) {
     Context.insert(InstARM32Mls::create(Func, Dest, Src0, Src1, Acc, Pred));
   }
-  /// If Dest=nullptr is passed in, then a new variable is created,
-  /// marked as infinite register allocation weight, and returned
-  /// through the in/out Dest argument.
+  /// If Dest=nullptr is passed in, then a new variable is created, marked as
+  /// infinite register allocation weight, and returned through the in/out Dest
+  /// argument.
   void _mov(Variable *&Dest, Operand *Src0,
             CondARM32::Cond Pred = CondARM32::AL,
             int32_t RegNum = Variable::NoRegister) {
@@ -281,8 +279,8 @@
     NewInst->setDestNonKillable();
     Context.insert(NewInst);
   }
-  /// The Operand can only be a 16-bit immediate or a ConstantRelocatable
-  /// (with an upper16 relocation).
+  /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
+  /// an upper16 relocation).
   void _movt(Variable *Dest, Operand *Src0,
              CondARM32::Cond Pred = CondARM32::AL) {
     Context.insert(InstARM32Movt::create(Func, Dest, Src0, Pred));
@@ -378,8 +376,8 @@
               Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
     Context.insert(
         InstARM32Umull::create(Func, DestLo, DestHi, Src0, Src1, Pred));
-    // Model the modification to the second dest as a fake def.
-    // Note that the def is not predicated.
+    // Model the modification to the second dest as a fake def. Note that the
+    // def is not predicated.
     Context.insert(InstFakeDef::create(Func, DestHi, DestLo));
   }
   void _uxt(Variable *Dest, Variable *Src0,
@@ -400,11 +398,10 @@
              CondARM32::Cond Pred = CondARM32::AL) {
     Context.insert(InstARM32Vldr::create(Func, Dest, Src, Pred));
   }
-  // There are a whole bunch of vmov variants, to transfer within
-  // S/D/Q registers, between core integer registers and S/D,
-  // and from small immediates into S/D.
-  // For integer -> S/D/Q there is a variant which takes two integer
-  // register to fill a D, or to fill two consecutive S registers.
+  // There are a whole bunch of vmov variants, to transfer within S/D/Q
+  // registers, between core integer registers and S/D, and from small
+  // immediates into S/D. For integer -> S/D/Q there is a variant which takes
+  // two integer register to fill a D, or to fill two consecutive S registers.
   // Vmov can also be used to insert-element. E.g.,
   //    "vmov.8 d0[1], r0"
   // but insert-element is a "two-address" operation where only part of the
@@ -440,8 +437,8 @@
   }
 
   /// Run a pass through stack variables and ensure that the offsets are legal.
-  /// If the offset is not legal, use a new base register that accounts for
-  /// the offset, such that the addressing mode offset bits are now legal.
+  /// If the offset is not legal, use a new base register that accounts for the
+  /// offset, such that the addressing mode offset bits are now legal.
   void legalizeStackSlots();
   /// Returns true if the given Offset can be represented in a stack ldr/str.
   bool isLegalVariableStackOffset(int32_t Offset) const;
@@ -464,11 +461,11 @@
   /// Helper class that understands the Calling Convention and register
   /// assignments. The first few integer type parameters can use r0-r3,
   /// regardless of their position relative to the floating-point/vector
-  /// arguments in the argument list. Floating-point and vector arguments
-  /// can use q0-q3 (aka d0-d7, s0-s15). Technically, arguments that can
-  /// start with registers but extend beyond the available registers can be
-  /// split between the registers and the stack. However, this is typically
-  /// for passing GPR structs by value, and PNaCl transforms expand this out.
+  /// arguments in the argument list. Floating-point and vector arguments can
+  /// use q0-q3 (aka d0-d7, s0-s15). Technically, arguments that can start with
+  /// registers but extend beyond the available registers can be split between
+  /// the registers and the stack. However, this is typically for passing GPR
+  /// structs by value, and PNaCl transforms expand this out.
   ///
   /// Also, at the point before the call, the stack must be aligned.
   class CallingConv {
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 080e56b..b634306 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -43,9 +43,8 @@
 } // end of anonymous namespace
 
 TargetMIPS32::TargetMIPS32(Cfg *Func) : TargetLowering(Func) {
-  // TODO: Don't initialize IntegerRegisters and friends every time.
-  // Instead, initialize in some sort of static initializer for the
-  // class.
+  // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
+  // initialize in some sort of static initializer for the class.
   llvm::SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM);
   llvm::SmallBitVector FloatRegisters(RegMIPS32::Reg_NUM);
   llvm::SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM);
@@ -105,19 +104,18 @@
   // Argument lowering
   Func->doArgLowering();
 
-  // Target lowering.  This requires liveness analysis for some parts
-  // of the lowering decisions, such as compare/branch fusing.  If
-  // non-lightweight liveness analysis is used, the instructions need
-  // to be renumbered first.  TODO: This renumbering should only be
-  // necessary if we're actually calculating live intervals, which we
-  // only do for register allocation.
+  // Target lowering. This requires liveness analysis for some parts of the
+  // lowering decisions, such as compare/branch fusing. If non-lightweight
+  // liveness analysis is used, the instructions need to be renumbered first.
+  // TODO: This renumbering should only be necessary if we're actually
+  // calculating live intervals, which we only do for register allocation.
   Func->renumberInstructions();
   if (Func->hasError())
     return;
 
-  // TODO: It should be sufficient to use the fastest liveness
-  // calculation, i.e. livenessLightweight().  However, for some
-  // reason that slows down the rest of the translation.  Investigate.
+  // TODO: It should be sufficient to use the fastest liveness calculation,
+  // i.e. livenessLightweight(). However, for some reason that slows down the
+  // rest of the translation. Investigate.
   Func->liveness(Liveness_Basic);
   if (Func->hasError())
     return;
@@ -128,19 +126,19 @@
     return;
   Func->dump("After MIPS32 codegen");
 
-  // Register allocation.  This requires instruction renumbering and
-  // full liveness analysis.
+  // Register allocation. This requires instruction renumbering and full
+  // liveness analysis.
   Func->renumberInstructions();
   if (Func->hasError())
     return;
   Func->liveness(Liveness_Intervals);
   if (Func->hasError())
     return;
-  // Validate the live range computations.  The expensive validation
-  // call is deliberately only made when assertions are enabled.
+  // Validate the live range computations. The expensive validation call is
+  // deliberately only made when assertions are enabled.
   assert(Func->validateLiveness());
-  // The post-codegen dump is done here, after liveness analysis and
-  // associated cleanup, to make the dump cleaner and more useful.
+  // The post-codegen dump is done here, after liveness analysis and associated
+  // cleanup, to make the dump cleaner and more useful.
   Func->dump("After initial MIPS32 codegen");
   Func->getVMetadata()->init(VMK_All);
   regAlloc(RAK_Global);
@@ -162,11 +160,10 @@
   Func->contractEmptyNodes();
   Func->reorderNodes();
 
-  // Branch optimization.  This needs to be done just before code
-  // emission.  In particular, no transformations that insert or
-  // reorder CfgNodes should be done after branch optimization.  We go
-  // ahead and do it before nop insertion to reduce the amount of work
-  // needed for searching for opportunities.
+  // Branch optimization. This needs to be done just before code emission. In
+  // particular, no transformations that insert or reorder CfgNodes should be
+  // done after branch optimization. We go ahead and do it before nop insertion
+  // to reduce the amount of work needed for searching for opportunities.
   Func->doBranchOpt();
   Func->dump("After branch optimization");
 
@@ -246,8 +243,8 @@
     Reg = Func->makeVariable(Ty);
     Reg->setRegNum(RegNum);
     PhysicalRegisters[Ty][RegNum] = Reg;
-    // Specially mark SP as an "argument" so that it is considered
-    // live upon function entry.
+    // Specially mark SP as an "argument" so that it is considered live upon
+    // function entry.
     if (RegNum == RegMIPS32::Reg_SP || RegNum == RegMIPS32::Reg_RA) {
       Func->addImplicitArg(Reg);
       Reg->setIgnoreLiveness();
@@ -321,11 +318,11 @@
 
 void TargetMIPS32::lowerAlloca(const InstAlloca *Inst) {
   UsesFramePointer = true;
-  // Conservatively require the stack to be aligned.  Some stack
-  // adjustment operations implemented below assume that the stack is
-  // aligned before the alloca.  All the alloca code ensures that the
-  // stack alignment is preserved after the alloca.  The stack alignment
-  // restriction can be relaxed in some cases.
+  // Conservatively require the stack to be aligned. Some stack adjustment
+  // operations implemented below assume that the stack is aligned before the
+  // alloca. All the alloca code ensures that the stack alignment is preserved
+  // after the alloca. The stack alignment restriction can be relaxed in some
+  // cases.
   NeedsStackAlignment = true;
   (void)Inst;
   UnimplementedError(Func->getContext()->getFlags());
@@ -483,9 +480,9 @@
     UnimplementedError(Func->getContext()->getFlags());
     return;
   case Intrinsics::AtomicFenceAll:
-    // NOTE: FenceAll should prevent and load/store from being moved
-    // across the fence (both atomic and non-atomic). The InstMIPS32Mfence
-    // instruction is currently marked coarsely as "HasSideEffects".
+    // NOTE: FenceAll should prevent and load/store from being moved across the
+    // fence (both atomic and non-atomic). The InstMIPS32Mfence instruction is
+    // currently marked coarsely as "HasSideEffects".
     UnimplementedError(Func->getContext()->getFlags());
     return;
   case Intrinsics::AtomicIsLockFree: {
@@ -549,9 +546,8 @@
     return;
   }
   case Intrinsics::Memset: {
-    // The value operand needs to be extended to a stack slot size
-    // because the PNaCl ABI requires arguments to be at least 32 bits
-    // wide.
+    // The value operand needs to be extended to a stack slot size because the
+    // PNaCl ABI requires arguments to be at least 32 bits wide.
     Operand *ValOp = Instr->getArg(1);
     assert(ValOp->getType() == IceType_i8);
     Variable *ValExt = Func->makeVariable(stackSlotType());
@@ -651,10 +647,9 @@
   UnimplementedError(Func->getContext()->getFlags());
 }
 
-// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
-// preserve integrity of liveness analysis.  Undef values are also
-// turned into zeroes, since loOperand() and hiOperand() don't expect
-// Undef input.
+// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
+// integrity of liveness analysis. Undef values are also turned into zeroes,
+// since loOperand() and hiOperand() don't expect Undef input.
 void TargetMIPS32::prelowerPhis() {
   UnimplementedError(Func->getContext()->getFlags());
 }
@@ -662,8 +657,8 @@
 void TargetMIPS32::postLower() {
   if (Ctx->getFlags().getOptLevel() == Opt_m1)
     return;
-  // Find two-address non-SSA instructions where Dest==Src0, and set
-  // the DestNonKillable flag to keep liveness analysis consistent.
+  // Find two-address non-SSA instructions where Dest==Src0, and set the
+  // DestNonKillable flag to keep liveness analysis consistent.
   UnimplementedError(Func->getContext()->getFlags());
 }
 
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index 52c09cf..1ae0b28 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -52,8 +52,8 @@
     return UsesFramePointer ? RegMIPS32::Reg_FP : RegMIPS32::Reg_SP;
   }
   size_t typeWidthInBytesOnStack(Type Ty) const override {
-    // Round up to the next multiple of 4 bytes.  In particular, i1,
-    // i8, and i16 are rounded up to 4 bytes.
+    // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
+    // are rounded up to 4 bytes.
     return (typeWidthInBytes(Ty) + 3) & ~3;
   }
 
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index f06150f..47f6ae1 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -8,9 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements the TargetLoweringX8632 class, which
-/// consists almost entirely of the lowering sequence for each
-/// high-level instruction.
+/// This file implements the TargetLoweringX8632 class, which consists almost
+/// entirely of the lowering sequence for each high-level instruction.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -100,24 +99,21 @@
 void TargetX8632::lowerCall(const InstCall *Instr) {
   // x86-32 calling convention:
   //
-  // * At the point before the call, the stack must be aligned to 16
-  // bytes.
+  // * At the point before the call, the stack must be aligned to 16 bytes.
   //
-  // * The first four arguments of vector type, regardless of their
-  // position relative to the other arguments in the argument list, are
-  // placed in registers xmm0 - xmm3.
+  // * The first four arguments of vector type, regardless of their position
+  // relative to the other arguments in the argument list, are placed in
+  // registers xmm0 - xmm3.
   //
-  // * Other arguments are pushed onto the stack in right-to-left order,
-  // such that the left-most argument ends up on the top of the stack at
-  // the lowest memory address.
+  // * Other arguments are pushed onto the stack in right-to-left order, such
+  // that the left-most argument ends up on the top of the stack at the lowest
+  // memory address.
   //
-  // * Stack arguments of vector type are aligned to start at the next
-  // highest multiple of 16 bytes.  Other stack arguments are aligned to
-  // 4 bytes.
+  // * Stack arguments of vector type are aligned to start at the next highest
+  // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes.
   //
-  // This intends to match the section "IA-32 Function Calling
-  // Convention" of the document "OS X ABI Function Call Guide" by
-  // Apple.
+  // This intends to match the section "IA-32 Function Calling Convention" of
+  // the document "OS X ABI Function Call Guide" by Apple.
   NeedsStackAlignment = true;
 
   using OperandList = std::vector<Operand *>;
@@ -149,46 +145,44 @@
     }
   }
 
-  // Adjust the parameter area so that the stack is aligned.  It is
-  // assumed that the stack is already aligned at the start of the
-  // calling sequence.
+  // Adjust the parameter area so that the stack is aligned. It is assumed that
+  // the stack is already aligned at the start of the calling sequence.
   ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
 
-  // Subtract the appropriate amount for the argument area.  This also
-  // takes care of setting the stack adjustment during emission.
+  // Subtract the appropriate amount for the argument area. This also takes
+  // care of setting the stack adjustment during emission.
   //
-  // TODO: If for some reason the call instruction gets dead-code
-  // eliminated after lowering, we would need to ensure that the
-  // pre-call and the post-call esp adjustment get eliminated as well.
+  // TODO: If for some reason the call instruction gets dead-code eliminated
+  // after lowering, we would need to ensure that the pre-call and the
+  // post-call esp adjustment get eliminated as well.
   if (ParameterAreaSizeBytes) {
     _adjust_stack(ParameterAreaSizeBytes);
   }
 
-  // Copy arguments that are passed on the stack to the appropriate
-  // stack locations.
+  // Copy arguments that are passed on the stack to the appropriate stack
+  // locations.
   for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
     lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
   }
 
-  // Copy arguments to be passed in registers to the appropriate
-  // registers.
-  // TODO: Investigate the impact of lowering arguments passed in
-  // registers after lowering stack arguments as opposed to the other
-  // way around.  Lowering register arguments after stack arguments may
-  // reduce register pressure.  On the other hand, lowering register
-  // arguments first (before stack arguments) may result in more compact
-  // code, as the memory operand displacements may end up being smaller
-  // before any stack adjustment is done.
+  // Copy arguments to be passed in registers to the appropriate registers.
+  // TODO: Investigate the impact of lowering arguments passed in registers
+  // after lowering stack arguments as opposed to the other way around.
+  // Lowering register arguments after stack arguments may reduce register
+  // pressure. On the other hand, lowering register arguments first (before
+  // stack arguments) may result in more compact code, as the memory operand
+  // displacements may end up being smaller before any stack adjustment is
+  // done.
   for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
     Variable *Reg =
         legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
-    // Generate a FakeUse of register arguments so that they do not get
-    // dead code eliminated as a result of the FakeKill of scratch
-    // registers after the call.
+    // Generate a FakeUse of register arguments so that they do not get dead
+    // code eliminated as a result of the FakeKill of scratch registers after
+    // the call.
     Context.insert(InstFakeUse::create(Func, Reg));
   }
-  // Generate the call instruction.  Assign its result to a temporary
-  // with high register allocation weight.
+  // Generate the call instruction. Assign its result to a temporary with high
+  // register allocation weight.
   Variable *Dest = Instr->getDest();
   // ReturnReg doubles as ReturnRegLo as necessary.
   Variable *ReturnReg = nullptr;
@@ -211,8 +205,8 @@
       break;
     case IceType_f32:
     case IceType_f64:
-      // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
-      // the fstp instruction.
+      // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with the
+      // fstp instruction.
       break;
     case IceType_v4i1:
     case IceType_v8i1:
@@ -247,8 +241,8 @@
   if (ReturnRegHi)
     Context.insert(InstFakeDef::create(Func, ReturnRegHi));
 
-  // Add the appropriate offset to esp.  The call instruction takes care
-  // of resetting the stack offset during emission.
+  // Add the appropriate offset to esp. The call instruction takes care of
+  // resetting the stack offset during emission.
   if (ParameterAreaSizeBytes) {
     Variable *esp =
         Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
@@ -287,22 +281,21 @@
       }
     }
   } else if (isScalarFloatingType(Dest->getType())) {
-    // Special treatment for an FP function which returns its result in
-    // st(0).
-    // If Dest ends up being a physical xmm register, the fstp emit code
-    // will route st(0) through a temporary stack slot.
+    // Special treatment for an FP function which returns its result in st(0).
+    // If Dest ends up being a physical xmm register, the fstp emit code will
+    // route st(0) through a temporary stack slot.
     _fstp(Dest);
-    // Create a fake use of Dest in case it actually isn't used,
-    // because st(0) still needs to be popped.
+    // Create a fake use of Dest in case it actually isn't used, because st(0)
+    // still needs to be popped.
     Context.insert(InstFakeUse::create(Func, Dest));
   }
 }
 
 void TargetX8632::lowerArguments() {
   VarList &Args = Func->getArgs();
-  // The first four arguments of vector type, regardless of their
-  // position relative to the other arguments in the argument list, are
-  // passed in registers xmm0 - xmm3.
+  // The first four arguments of vector type, regardless of their position
+  // relative to the other arguments in the argument list, are passed in
+  // registers xmm0 - xmm3.
   unsigned NumXmmArgs = 0;
 
   Context.init(Func->getEntryNode());
@@ -314,9 +307,9 @@
     Type Ty = Arg->getType();
     if (!isVectorType(Ty))
       continue;
-    // Replace Arg in the argument list with the home register.  Then
-    // generate an instruction in the prolog to copy the home register
-    // to the assigned location of Arg.
+    // Replace Arg in the argument list with the home register. Then generate
+    // an instruction in the prolog to copy the home register to the assigned
+    // location of Arg.
     int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;
     ++NumXmmArgs;
     Variable *RegisterArg = Func->makeVariable(Ty);
@@ -351,15 +344,14 @@
       _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
     }
   }
-  // Add a ret instruction even if sandboxing is enabled, because
-  // addEpilog explicitly looks for a ret instruction as a marker for
-  // where to insert the frame removal instructions.
+  // Add a ret instruction even if sandboxing is enabled, because addEpilog
+  // explicitly looks for a ret instruction as a marker for where to insert the
+  // frame removal instructions.
   _ret(Reg);
   // Add a fake use of esp to make sure esp stays alive for the entire
-  // function.  Otherwise post-call esp adjustments get dead-code
-  // eliminated.  TODO: Are there more places where the fake use
-  // should be inserted?  E.g. "void f(int n){while(1) g(n);}" may not
-  // have a ret instruction.
+  // function. Otherwise post-call esp adjustments get dead-code eliminated.
+  // TODO: Are there more places where the fake use should be inserted? E.g.
+  // "void f(int n){while(1) g(n);}" may not have a ret instruction.
   Variable *esp =
       Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
   Context.insert(InstFakeUse::create(Func, esp));
@@ -395,16 +387,15 @@
   //  * LocalsSpillAreaSize:    area 6
   //  * SpillAreaSizeBytes:     areas 3 - 7
 
-  // Determine stack frame offsets for each Variable without a
-  // register assignment.  This can be done as one variable per stack
-  // slot.  Or, do coalescing by running the register allocator again
-  // with an infinite set of registers (as a side effect, this gives
-  // variables a second chance at physical register assignment).
+  // Determine stack frame offsets for each Variable without a register
+  // assignment. This can be done as one variable per stack slot. Or, do
+  // coalescing by running the register allocator again with an infinite set of
+  // registers (as a side effect, this gives variables a second chance at
+  // physical register assignment).
   //
-  // A middle ground approach is to leverage sparsity and allocate one
-  // block of space on the frame for globals (variables with
-  // multi-block lifetime), and one block to share for locals
-  // (single-block lifetime).
+  // A middle ground approach is to leverage sparsity and allocate one block of
+  // space on the frame for globals (variables with multi-block lifetime), and
+  // one block to share for locals (single-block lifetime).
 
   Context.init(Node);
   Context.setInsertPoint(Context.getCur());
@@ -414,17 +405,16 @@
   RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
   VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
   size_t GlobalsSize = 0;
-  // If there is a separate locals area, this represents that area.
-  // Otherwise it counts any variable not counted by GlobalsSize.
+  // If there is a separate locals area, this represents that area. Otherwise
+  // it counts any variable not counted by GlobalsSize.
   SpillAreaSizeBytes = 0;
-  // If there is a separate locals area, this specifies the alignment
-  // for it.
+  // If there is a separate locals area, this specifies the alignment for it.
   uint32_t LocalsSlotsAlignmentBytes = 0;
-  // The entire spill locations area gets aligned to largest natural
-  // alignment of the variables that have a spill slot.
+  // The entire spill locations area gets aligned to largest natural alignment
+  // of the variables that have a spill slot.
   uint32_t SpillAreaAlignmentBytes = 0;
-  // A spill slot linked to a variable with a stack slot should reuse
-  // that stack slot.
+  // A spill slot linked to a variable with a stack slot should reuse that
+  // stack slot.
   std::function<bool(Variable *)> TargetVarHook =
       [&VariablesLinkedToSpillSlots](Variable *Var) {
         if (auto *SpillVar =
@@ -466,15 +456,14 @@
     Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
     _push(ebp);
     _mov(ebp, esp);
-    // Keep ebp live for late-stage liveness analysis
-    // (e.g. asm-verbose mode).
+    // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).
     Context.insert(InstFakeUse::create(Func, ebp));
   }
 
-  // Align the variables area. SpillAreaPaddingBytes is the size of
-  // the region after the preserved registers and before the spill areas.
-  // LocalsSlotsPaddingBytes is the amount of padding between the globals
-  // and locals area if they are separate.
+  // Align the variables area. SpillAreaPaddingBytes is the size of the region
+  // after the preserved registers and before the spill areas.
+  // LocalsSlotsPaddingBytes is the amount of padding between the globals and
+  // locals area if they are separate.
   assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
   assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
   uint32_t SpillAreaPaddingBytes = 0;
@@ -504,9 +493,9 @@
 
   resetStackAdjustment();
 
-  // Fill in stack offsets for stack args, and copy args into registers
-  // for those that were register-allocated.  Args are pushed right to
-  // left, so Arg[0] is closest to the stack/frame pointer.
+  // Fill in stack offsets for stack args, and copy args into registers for
+  // those that were register-allocated. Args are pushed right to left, so
+  // Arg[0] is closest to the stack/frame pointer.
   Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
   size_t BasicFrameOffset =
       PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
@@ -576,8 +565,8 @@
   if (RI == E)
     return;
 
-  // Convert the reverse_iterator position into its corresponding
-  // (forward) iterator position.
+  // Convert the reverse_iterator position into its corresponding (forward)
+  // iterator position.
   InstList::iterator InsertPoint = RI.base();
   --InsertPoint;
   Context.init(Node);
@@ -586,9 +575,9 @@
   Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
   if (IsEbpBasedFrame) {
     Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
-    // For late-stage liveness analysis (e.g. asm-verbose mode),
-    // adding a fake use of esp before the assignment of esp=ebp keeps
-    // previous esp adjustments from being dead-code eliminated.
+    // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
+    // use of esp before the assignment of esp=ebp keeps previous esp
+    // adjustments from being dead-code eliminated.
     Context.insert(InstFakeUse::create(Func, esp));
     _mov(esp, ebp);
     _pop(ebp);
@@ -747,8 +736,8 @@
       continue;
     typename T::IceType *Const = llvm::cast<typename T::IceType>(C);
     typename T::IceType::PrimType Value = Const->getValue();
-    // Use memcpy() to copy bits from Value into RawValue in a way
-    // that avoids breaking strict-aliasing rules.
+    // Use memcpy() to copy bits from Value into RawValue in a way that avoids
+    // breaking strict-aliasing rules.
     typename T::PrimitiveIntType RawValue;
     memcpy(&RawValue, &Value, sizeof(Value));
     char buf[30];
@@ -766,8 +755,8 @@
 void TargetDataX8632::lowerConstants() {
   if (Ctx->getFlags().getDisableTranslation())
     return;
-  // No need to emit constants from the int pool since (for x86) they
-  // are embedded as immediates in the instructions, just emit float/double.
+  // No need to emit constants from the int pool since (for x86) they are
+  // embedded as immediates in the instructions, just emit float/double.
   switch (Ctx->getFlags().getOutFileType()) {
   case FT_Elf: {
     ELFObjectWriter *Writer = Ctx->getObjectWriter();
@@ -846,19 +835,17 @@
 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)
     : TargetHeaderLowering(Ctx) {}
 
-// In some cases, there are x-macros tables for both high-level and
-// low-level instructions/operands that use the same enum key value.
-// The tables are kept separate to maintain a proper separation
-// between abstraction layers.  There is a risk that the tables could
-// get out of sync if enum values are reordered or if entries are
-// added or deleted.  The following dummy namespaces use
+// In some cases, there are x-macros tables for both high-level and low-level
+// instructions/operands that use the same enum key value. The tables are kept
+// separate to maintain a proper separation between abstraction layers. There
+// is a risk that the tables could get out of sync if enum values are reordered
+// or if entries are added or deleted. The following dummy namespaces use
 // static_asserts to ensure everything is kept in sync.
 
 namespace {
 // Validate the enum values in FCMPX8632_TABLE.
 namespace dummy1 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
 enum _tmp_enum {
 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
   FCMPX8632_TABLE
@@ -869,8 +856,8 @@
 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
 ICEINSTFCMP_TABLE
 #undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
 #define X(val, dflt, swapS, C1, C2, swapV, pred)                               \
   static const int _table2_##val = _tmp_##val;                                 \
   static_assert(                                                               \
@@ -878,8 +865,8 @@
       "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
 FCMPX8632_TABLE
 #undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
 #define X(tag, str)                                                            \
   static_assert(                                                               \
       _table1_##tag == _table2_##tag,                                          \
@@ -890,8 +877,7 @@
 
 // Validate the enum values in ICMPX8632_TABLE.
 namespace dummy2 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
 enum _tmp_enum {
 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
   ICMPX8632_TABLE
@@ -902,8 +888,8 @@
 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
 ICEINSTICMP_TABLE
 #undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
 #define X(val, C_32, C1_64, C2_64, C3_64)                                      \
   static const int _table2_##val = _tmp_##val;                                 \
   static_assert(                                                               \
@@ -911,8 +897,8 @@
       "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
 ICMPX8632_TABLE
 #undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
 #define X(tag, str)                                                            \
   static_assert(                                                               \
       _table1_##tag == _table2_##tag,                                          \
@@ -923,8 +909,7 @@
 
 // Validate the enum values in ICETYPEX8632_TABLE.
 namespace dummy3 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
 enum _tmp_enum {
 #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
   ICETYPEX8632_TABLE
@@ -936,16 +921,16 @@
   static const int _table1_##tag = tag;
 ICETYPE_TABLE
 #undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
 #define X(tag, elementty, cvt, sdss, pack, width, fld)                         \
   static const int _table2_##tag = _tmp_##tag;                                 \
   static_assert(_table1_##tag == _table2_##tag,                                \
                 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
 ICETYPEX8632_TABLE
 #undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
 #define X(tag, sizeLog2, align, elts, elty, str)                               \
   static_assert(_table1_##tag == _table2_##tag,                                \
                 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 6187809..b1d74f5 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -8,9 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares the TargetLoweringX8632 class, which
-/// implements the TargetLowering interface for the x86-32
-/// architecture.
+/// This file declares the TargetLoweringX8632 class, which implements the
+/// TargetLowering interface for the x86-32 architecture.
 ///
 //===----------------------------------------------------------------------===//
 
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h
index 3bee361..918a585 100644
--- a/src/IceTargetLoweringX8632Traits.h
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -390,10 +390,10 @@
                                 const llvm::SmallBitVector &ExcludeRegisters,
                                 uint64_t Salt) {
     // TODO(stichnot): Declaring Permutation this way loses type/size
-    // information.  Fix this in conjunction with the caller-side TODO.
+    // information. Fix this in conjunction with the caller-side TODO.
     assert(Permutation.size() >= RegisterSet::Reg_NUM);
     // Expected upper bound on the number of registers in a single equivalence
-    // class.  For x86-32, this would comprise the 8 XMM registers.  This is for
+    // class. For x86-32, this would comprise the 8 XMM registers. This is for
     // performance, not correctness.
     static const unsigned MaxEquivalenceClassSize = 8;
     using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>;
@@ -477,8 +477,8 @@
   static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16;
   /// @}
 
-  /// Value is in bytes. Return Value adjusted to the next highest multiple
-  /// of the stack alignment.
+  /// Value is in bytes. Return Value adjusted to the next highest multiple of
+  /// the stack alignment.
   static uint32_t applyStackAlignment(uint32_t Value) {
     return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
   }
@@ -500,17 +500,17 @@
   /// instruction. There is one table entry for each of the 16 conditions.
   ///
   /// The first four columns describe the case when the operands are floating
-  /// point scalar values.  A comment in lowerFcmp() describes the lowering
-  /// template.  In the most general case, there is a compare followed by two
+  /// point scalar values. A comment in lowerFcmp() describes the lowering
+  /// template. In the most general case, there is a compare followed by two
   /// conditional branches, because some fcmp conditions don't map to a single
-  /// x86 conditional branch.  However, in many cases it is possible to swap the
-  /// operands in the comparison and have a single conditional branch.  Since
+  /// x86 conditional branch. However, in many cases it is possible to swap the
+  /// operands in the comparison and have a single conditional branch. Since
   /// it's quite tedious to validate the table by hand, good execution tests are
   /// helpful.
   ///
   /// The last two columns describe the case when the operands are vectors of
-  /// floating point values.  For most fcmp conditions, there is a clear mapping
-  /// to a single x86 cmpps instruction variant.  Some fcmp conditions require
+  /// floating point values. For most fcmp conditions, there is a clear mapping
+  /// to a single x86 cmpps instruction variant. Some fcmp conditions require
   /// special code to handle and these are marked in the table with a
   /// Cmpps_Invalid predicate.
   /// {@
@@ -525,7 +525,7 @@
   /// @}
 
   /// The following table summarizes the logic for lowering the icmp instruction
-  /// for i32 and narrower types.  Each icmp condition has a clear mapping to an
+  /// for i32 and narrower types. Each icmp condition has a clear mapping to an
   /// x86 conditional branch instruction.
   /// {@
   static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[];
@@ -533,8 +533,8 @@
   /// @}
 
   /// The following table summarizes the logic for lowering the icmp instruction
-  /// for the i64 type.  For Eq and Ne, two separate 32-bit comparisons and
-  /// conditional branches are needed.  For the other conditions, three separate
+  /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
+  /// conditional branches are needed. For the other conditions, three separate
   /// conditional branches are needed.
   /// {@
   static const struct TableIcmp64Type {
@@ -567,8 +567,8 @@
   using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8632>;
   using Assembler = X8632::AssemblerX8632;
 
-  /// X86Operand extends the Operand hierarchy.  Its subclasses are
-  /// X86OperandMem and VariableSplit.
+  /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem
+  /// and VariableSplit.
   class X86Operand : public ::Ice::Operand {
     X86Operand() = delete;
     X86Operand(const X86Operand &) = delete;
@@ -644,8 +644,8 @@
   };
 
   /// VariableSplit is a way to treat an f64 memory location as a pair of i32
-  /// locations (Low and High).  This is needed for some cases of the Bitcast
-  /// instruction.  Since it's not possible for integer registers to access the
+  /// locations (Low and High). This is needed for some cases of the Bitcast
+  /// instruction. Since it's not possible for integer registers to access the
   /// XMM registers and vice versa, the lowering forces the f64 to be spilled to
   /// the stack and then accesses through the VariableSplit.
   // TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit
@@ -685,11 +685,11 @@
     Portion Part;
   };
 
-  /// SpillVariable decorates a Variable by linking it to another Variable.
-  /// When stack frame offsets are computed, the SpillVariable is given a
-  /// distinct stack slot only if its linked Variable has a register.  If the
-  /// linked Variable has a stack slot, then the Variable and SpillVariable
-  /// share that slot.
+  /// SpillVariable decorates a Variable by linking it to another Variable. When
+  /// stack frame offsets are computed, the SpillVariable is given a distinct
+  /// stack slot only if its linked Variable has a register. If the linked
+  /// Variable has a stack slot, then the Variable and SpillVariable share that
+  /// slot.
   class SpillVariable : public Variable {
     SpillVariable() = delete;
     SpillVariable(const SpillVariable &) = delete;
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 83a5fa5..8c77baa 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -8,9 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements the TargetLoweringX8664 class, which
-/// consists almost entirely of the lowering sequence for each
-/// high-level instruction.
+/// This file implements the TargetLoweringX8664 class, which consists almost
+/// entirely of the lowering sequence for each high-level instruction.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -131,24 +130,22 @@
 void TargetX8664::lowerCall(const InstCall *Instr) {
   // x86-64 calling convention:
   //
-  // * At the point before the call, the stack must be aligned to 16
-  // bytes.
+  // * At the point before the call, the stack must be aligned to 16 bytes.
   //
   // * The first eight arguments of vector/fp type, regardless of their
-  // position relative to the other arguments in the argument list, are
-  // placed in registers %xmm0 - %xmm7.
+  // position relative to the other arguments in the argument list, are placed
+  // in registers %xmm0 - %xmm7.
   //
-  // * The first six arguments of integer types, regardless of their
-  // position relative to the other arguments in the argument list, are
-  // placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
+  // * The first six arguments of integer types, regardless of their position
+  // relative to the other arguments in the argument list, are placed in
+  // registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
   //
-  // * Other arguments are pushed onto the stack in right-to-left order,
-  // such that the left-most argument ends up on the top of the stack at
-  // the lowest memory address.
+  // * Other arguments are pushed onto the stack in right-to-left order, such
+  // that the left-most argument ends up on the top of the stack at the lowest
+  // memory address.
   //
-  // * Stack arguments of vector type are aligned to start at the next
-  // highest multiple of 16 bytes.  Other stack arguments are aligned to
-  // 8 bytes.
+  // * Stack arguments of vector type are aligned to start at the next highest
+  // multiple of 16 bytes. Other stack arguments are aligned to 8 bytes.
   //
   // This intends to match the section "Function Calling Sequence" of the
   // document "System V Application Binary Interface."
@@ -191,41 +188,39 @@
     }
   }
 
-  // Adjust the parameter area so that the stack is aligned.  It is
-  // assumed that the stack is already aligned at the start of the
-  // calling sequence.
+  // Adjust the parameter area so that the stack is aligned. It is assumed that
+  // the stack is already aligned at the start of the calling sequence.
   ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
 
-  // Subtract the appropriate amount for the argument area.  This also
-  // takes care of setting the stack adjustment during emission.
+  // Subtract the appropriate amount for the argument area. This also takes
+  // care of setting the stack adjustment during emission.
   //
-  // TODO: If for some reason the call instruction gets dead-code
-  // eliminated after lowering, we would need to ensure that the
-  // pre-call and the post-call esp adjustment get eliminated as well.
+  // TODO: If for some reason the call instruction gets dead-code eliminated
+  // after lowering, we would need to ensure that the pre-call and the
+  // post-call esp adjustment get eliminated as well.
   if (ParameterAreaSizeBytes) {
     _adjust_stack(ParameterAreaSizeBytes);
   }
 
-  // Copy arguments that are passed on the stack to the appropriate
-  // stack locations.
+  // Copy arguments that are passed on the stack to the appropriate stack
+  // locations.
   for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
     lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
   }
 
-  // Copy arguments to be passed in registers to the appropriate
-  // registers.
-  // TODO: Investigate the impact of lowering arguments passed in
-  // registers after lowering stack arguments as opposed to the other
-  // way around.  Lowering register arguments after stack arguments may
-  // reduce register pressure.  On the other hand, lowering register
-  // arguments first (before stack arguments) may result in more compact
-  // code, as the memory operand displacements may end up being smaller
-  // before any stack adjustment is done.
+  // Copy arguments to be passed in registers to the appropriate registers.
+  // TODO: Investigate the impact of lowering arguments passed in registers
+  // after lowering stack arguments as opposed to the other way around.
+  // Lowering register arguments after stack arguments may reduce register
+  // pressure. On the other hand, lowering register arguments first (before
+  // stack arguments) may result in more compact code, as the memory operand
+  // displacements may end up being smaller before any stack adjustment is
+  // done.
   for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
     Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));
-    // Generate a FakeUse of register arguments so that they do not get
-    // dead code eliminated as a result of the FakeKill of scratch
-    // registers after the call.
+    // Generate a FakeUse of register arguments so that they do not get dead
+    // code eliminated as a result of the FakeKill of scratch registers after
+    // the call.
     Context.insert(InstFakeUse::create(Func, Reg));
   }
 
@@ -234,8 +229,8 @@
     Context.insert(InstFakeUse::create(Func, Reg));
   }
 
-  // Generate the call instruction.  Assign its result to a temporary
-  // with high register allocation weight.
+  // Generate the call instruction. Assign its result to a temporary with high
+  // register allocation weight.
   Variable *Dest = Instr->getDest();
   // ReturnReg doubles as ReturnRegLo as necessary.
   Variable *ReturnReg = nullptr;
@@ -277,8 +272,8 @@
     llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
   }
 
-  // Add the appropriate offset to esp.  The call instruction takes care
-  // of resetting the stack offset during emission.
+  // Add the appropriate offset to esp. The call instruction takes care of
+  // resetting the stack offset during emission.
   if (ParameterAreaSizeBytes) {
     Variable *Esp =
         Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
@@ -310,11 +305,12 @@
 
 void TargetX8664::lowerArguments() {
   VarList &Args = Func->getArgs();
-  // The first eight vetcor typed arguments (as well as fp arguments) are passed
-  // in %xmm0 through %xmm7 regardless of their position in the argument list.
+  // The first eight vetcor typed arguments (as well as fp arguments) are
+  // passed in %xmm0 through %xmm7 regardless of their position in the argument
+  // list.
   unsigned NumXmmArgs = 0;
-  // The first six integer typed arguments are passed in %rdi, %rsi, %rdx, %rcx,
-  // %r8, and %r9 regardless of their position in the argument list.
+  // The first six integer typed arguments are passed in %rdi, %rsi, %rdx,
+  // %rcx, %r8, and %r9 regardless of their position in the argument list.
   unsigned NumGprArgs = 0;
 
   Context.init(Func->getEntryNode());
@@ -345,9 +341,9 @@
     }
     assert(RegNum != Variable::NoRegister);
     assert(RegisterArg != nullptr);
-    // Replace Arg in the argument list with the home register.  Then
-    // generate an instruction in the prolog to copy the home register
-    // to the assigned location of Arg.
+    // Replace Arg in the argument list with the home register. Then generate
+    // an instruction in the prolog to copy the home register to the assigned
+    // location of Arg.
     if (BuildDefs::dump())
       RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
     RegisterArg->setRegNum(RegNum);
@@ -371,15 +367,14 @@
       _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
     }
   }
-  // Add a ret instruction even if sandboxing is enabled, because
-  // addEpilog explicitly looks for a ret instruction as a marker for
-  // where to insert the frame removal instructions.
+  // Add a ret instruction even if sandboxing is enabled, because addEpilog
+  // explicitly looks for a ret instruction as a marker for where to insert the
+  // frame removal instructions.
   _ret(Reg);
   // Add a fake use of esp to make sure esp stays alive for the entire
-  // function.  Otherwise post-call esp adjustments get dead-code
-  // eliminated.  TODO: Are there more places where the fake use
-  // should be inserted?  E.g. "void f(int n){while(1) g(n);}" may not
-  // have a ret instruction.
+  // function. Otherwise post-call esp adjustments get dead-code eliminated.
+  // TODO: Are there more places where the fake use should be inserted? E.g.
+  // "void f(int n){while(1) g(n);}" may not have a ret instruction.
   Variable *esp =
       Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
   Context.insert(InstFakeUse::create(Func, esp));
@@ -415,16 +410,15 @@
   //  * LocalsSpillAreaSize:    area 6
   //  * SpillAreaSizeBytes:     areas 3 - 7
 
-  // Determine stack frame offsets for each Variable without a
-  // register assignment.  This can be done as one variable per stack
-  // slot.  Or, do coalescing by running the register allocator again
-  // with an infinite set of registers (as a side effect, this gives
-  // variables a second chance at physical register assignment).
+  // Determine stack frame offsets for each Variable without a register
+  // assignment. This can be done as one variable per stack slot. Or, do
+  // coalescing by running the register allocator again with an infinite set of
+  // registers (as a side effect, this gives variables a second chance at
+  // physical register assignment).
   //
-  // A middle ground approach is to leverage sparsity and allocate one
-  // block of space on the frame for globals (variables with
-  // multi-block lifetime), and one block to share for locals
-  // (single-block lifetime).
+  // A middle ground approach is to leverage sparsity and allocate one block of
+  // space on the frame for globals (variables with multi-block lifetime), and
+  // one block to share for locals (single-block lifetime).
 
   Context.init(Node);
   Context.setInsertPoint(Context.getCur());
@@ -434,17 +428,16 @@
   RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
   VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
   size_t GlobalsSize = 0;
-  // If there is a separate locals area, this represents that area.
-  // Otherwise it counts any variable not counted by GlobalsSize.
+  // If there is a separate locals area, this represents that area. Otherwise
+  // it counts any variable not counted by GlobalsSize.
   SpillAreaSizeBytes = 0;
-  // If there is a separate locals area, this specifies the alignment
-  // for it.
+  // If there is a separate locals area, this specifies the alignment for it.
   uint32_t LocalsSlotsAlignmentBytes = 0;
-  // The entire spill locations area gets aligned to largest natural
-  // alignment of the variables that have a spill slot.
+  // The entire spill locations area gets aligned to largest natural alignment
+  // of the variables that have a spill slot.
   uint32_t SpillAreaAlignmentBytes = 0;
-  // A spill slot linked to a variable with a stack slot should reuse
-  // that stack slot.
+  // A spill slot linked to a variable with a stack slot should reuse that
+  // stack slot.
   std::function<bool(Variable *)> TargetVarHook =
       [&VariablesLinkedToSpillSlots](Variable *Var) {
         if (auto *SpillVar =
@@ -486,15 +479,14 @@
     Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
     _push(ebp);
     _mov(ebp, esp);
-    // Keep ebp live for late-stage liveness analysis
-    // (e.g. asm-verbose mode).
+    // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).
     Context.insert(InstFakeUse::create(Func, ebp));
   }
 
-  // Align the variables area. SpillAreaPaddingBytes is the size of
-  // the region after the preserved registers and before the spill areas.
-  // LocalsSlotsPaddingBytes is the amount of padding between the globals
-  // and locals area if they are separate.
+  // Align the variables area. SpillAreaPaddingBytes is the size of the region
+  // after the preserved registers and before the spill areas.
+  // LocalsSlotsPaddingBytes is the amount of padding between the globals and
+  // locals area if they are separate.
   assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
   assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
   uint32_t SpillAreaPaddingBytes = 0;
@@ -524,9 +516,9 @@
 
   resetStackAdjustment();
 
-  // Fill in stack offsets for stack args, and copy args into registers
-  // for those that were register-allocated.  Args are pushed right to
-  // left, so Arg[0] is closest to the stack/frame pointer.
+  // Fill in stack offsets for stack args, and copy args into registers for
+  // those that were register-allocated. Args are pushed right to left, so
+  // Arg[0] is closest to the stack/frame pointer.
   Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
   size_t BasicFrameOffset =
       PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
@@ -605,8 +597,8 @@
   if (RI == E)
     return;
 
-  // Convert the reverse_iterator position into its corresponding
-  // (forward) iterator position.
+  // Convert the reverse_iterator position into its corresponding (forward)
+  // iterator position.
   InstList::iterator InsertPoint = RI.base();
   --InsertPoint;
   Context.init(Node);
@@ -615,9 +607,9 @@
   Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
   if (IsEbpBasedFrame) {
     Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
-    // For late-stage liveness analysis (e.g. asm-verbose mode),
-    // adding a fake use of esp before the assignment of esp=ebp keeps
-    // previous esp adjustments from being dead-code eliminated.
+    // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
+    // use of esp before the assignment of esp=ebp keeps previous esp
+    // adjustments from being dead-code eliminated.
     Context.insert(InstFakeUse::create(Func, esp));
     _mov(esp, ebp);
     _pop(ebp);
@@ -758,8 +750,8 @@
       continue;
     typename T::IceType *Const = llvm::cast<typename T::IceType>(C);
     typename T::IceType::PrimType Value = Const->getValue();
-    // Use memcpy() to copy bits from Value into RawValue in a way
-    // that avoids breaking strict-aliasing rules.
+    // Use memcpy() to copy bits from Value into RawValue in a way that avoids
+    // breaking strict-aliasing rules.
     typename T::PrimitiveIntType RawValue;
     memcpy(&RawValue, &Value, sizeof(Value));
     char buf[30];
@@ -777,8 +769,8 @@
 void TargetDataX8664::lowerConstants() {
   if (Ctx->getFlags().getDisableTranslation())
     return;
-  // No need to emit constants from the int pool since (for x86) they
-  // are embedded as immediates in the instructions, just emit float/double.
+  // No need to emit constants from the int pool since (for x86) they are
+  // embedded as immediates in the instructions, just emit float/double.
   switch (Ctx->getFlags().getOutFileType()) {
   case FT_Elf: {
     ELFObjectWriter *Writer = Ctx->getObjectWriter();
@@ -854,19 +846,17 @@
   }
 }
 
-// In some cases, there are x-macros tables for both high-level and
-// low-level instructions/operands that use the same enum key value.
-// The tables are kept separate to maintain a proper separation
-// between abstraction layers.  There is a risk that the tables could
-// get out of sync if enum values are reordered or if entries are
-// added or deleted.  The following dummy namespaces use
+// In some cases, there are x-macros tables for both high-level and low-level
+// instructions/operands that use the same enum key value. The tables are kept
+// separate to maintain a proper separation between abstraction layers. There
+// is a risk that the tables could get out of sync if enum values are reordered
+// or if entries are added or deleted. The following dummy namespaces use
 // static_asserts to ensure everything is kept in sync.
 
 namespace {
 // Validate the enum values in FCMPX8664_TABLE.
 namespace dummy1 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
 enum _tmp_enum {
 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
   FCMPX8664_TABLE
@@ -877,8 +867,8 @@
 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
 ICEINSTFCMP_TABLE
 #undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
 #define X(val, dflt, swapS, C1, C2, swapV, pred)                               \
   static const int _table2_##val = _tmp_##val;                                 \
   static_assert(                                                               \
@@ -886,8 +876,8 @@
       "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE");
 FCMPX8664_TABLE
 #undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
 #define X(tag, str)                                                            \
   static_assert(                                                               \
       _table1_##tag == _table2_##tag,                                          \
@@ -898,8 +888,7 @@
 
 // Validate the enum values in ICMPX8664_TABLE.
 namespace dummy2 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
 enum _tmp_enum {
 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
   ICMPX8664_TABLE
@@ -910,8 +899,8 @@
 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
 ICEINSTICMP_TABLE
 #undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
 #define X(val, C_32, C1_64, C2_64, C3_64)                                      \
   static const int _table2_##val = _tmp_##val;                                 \
   static_assert(                                                               \
@@ -919,8 +908,8 @@
       "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE");
 ICMPX8664_TABLE
 #undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
 #define X(tag, str)                                                            \
   static_assert(                                                               \
       _table1_##tag == _table2_##tag,                                          \
@@ -931,8 +920,7 @@
 
 // Validate the enum values in ICETYPEX8664_TABLE.
 namespace dummy3 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
 enum _tmp_enum {
 #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
   ICETYPEX8664_TABLE
@@ -944,16 +932,16 @@
   static const int _table1_##tag = tag;
 ICETYPE_TABLE
 #undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
 #define X(tag, elementty, cvt, sdss, pack, width, fld)                         \
   static const int _table2_##tag = _tmp_##tag;                                 \
   static_assert(_table1_##tag == _table2_##tag,                                \
                 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
 ICETYPEX8664_TABLE
 #undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
 #define X(tag, sizeLog2, align, elts, elty, str)                               \
   static_assert(_table1_##tag == _table2_##tag,                                \
                 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
diff --git a/src/IceTargetLoweringX8664Traits.h b/src/IceTargetLoweringX8664Traits.h
index 454b6cb..0ed40a8 100644
--- a/src/IceTargetLoweringX8664Traits.h
+++ b/src/IceTargetLoweringX8664Traits.h
@@ -404,10 +404,10 @@
                                 const llvm::SmallBitVector &ExcludeRegisters,
                                 uint64_t Salt) {
     // TODO(stichnot): Declaring Permutation this way loses type/size
-    // information.  Fix this in conjunction with the caller-side TODO.
+    // information. Fix this in conjunction with the caller-side TODO.
     assert(Permutation.size() >= RegisterSet::Reg_NUM);
     // Expected upper bound on the number of registers in a single equivalence
-    // class.  For x86-64, this would comprise the 16 XMM registers.  This is
+    // class.  For x86-64, this would comprise the 16 XMM registers. This is
     // for performance, not correctness.
     static const unsigned MaxEquivalenceClassSize = 8;
     using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>;
@@ -493,8 +493,8 @@
   static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16;
   /// @}
 
-  /// Value is in bytes. Return Value adjusted to the next highest multiple
-  /// of the stack alignment.
+  /// Value is in bytes. Return Value adjusted to the next highest multiple of
+  /// the stack alignment.
   static uint32_t applyStackAlignment(uint32_t Value) {
     return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
   }
@@ -516,17 +516,17 @@
   /// instruction. There is one table entry for each of the 16 conditions.
   ///
   /// The first four columns describe the case when the operands are floating
-  /// point scalar values.  A comment in lowerFcmp() describes the lowering
-  /// template.  In the most general case, there is a compare followed by two
+  /// point scalar values. A comment in lowerFcmp() describes the lowering
+  /// template. In the most general case, there is a compare followed by two
   /// conditional branches, because some fcmp conditions don't map to a single
-  /// x86 conditional branch.  However, in many cases it is possible to swap the
-  /// operands in the comparison and have a single conditional branch.  Since
+  /// x86 conditional branch. However, in many cases it is possible to swap the
+  /// operands in the comparison and have a single conditional branch. Since
   /// it's quite tedious to validate the table by hand, good execution tests are
   /// helpful.
   ///
   /// The last two columns describe the case when the operands are vectors of
-  /// floating point values.  For most fcmp conditions, there is a clear mapping
-  /// to a single x86 cmpps instruction variant.  Some fcmp conditions require
+  /// floating point values. For most fcmp conditions, there is a clear mapping
+  /// to a single x86 cmpps instruction variant. Some fcmp conditions require
   /// special code to handle and these are marked in the table with a
   /// Cmpps_Invalid predicate.
   /// {@
@@ -541,7 +541,7 @@
   /// @}
 
   /// The following table summarizes the logic for lowering the icmp instruction
-  /// for i32 and narrower types.  Each icmp condition has a clear mapping to an
+  /// for i32 and narrower types. Each icmp condition has a clear mapping to an
   /// x86 conditional branch instruction.
   /// {@
   static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[];
@@ -549,8 +549,8 @@
   /// @}
 
   /// The following table summarizes the logic for lowering the icmp instruction
-  /// for the i64 type.  For Eq and Ne, two separate 32-bit comparisons and
-  /// conditional branches are needed.  For the other conditions, three separate
+  /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
+  /// conditional branches are needed. For the other conditions, three separate
   /// conditional branches are needed.
   /// {@
   static const struct TableIcmp64Type {
@@ -583,8 +583,8 @@
   using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8664>;
   using Assembler = X8664::AssemblerX8664;
 
-  /// X86Operand extends the Operand hierarchy.  Its subclasses are
-  /// X86OperandMem and VariableSplit.
+  /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem
+  /// and VariableSplit.
   class X86Operand : public ::Ice::Operand {
     X86Operand() = delete;
     X86Operand(const X86Operand &) = delete;
@@ -655,8 +655,8 @@
   };
 
   /// VariableSplit is a way to treat an f64 memory location as a pair of i32
-  /// locations (Low and High).  This is needed for some cases of the Bitcast
-  /// instruction.  Since it's not possible for integer registers to access the
+  /// locations (Low and High). This is needed for some cases of the Bitcast
+  /// instruction. Since it's not possible for integer registers to access the
   /// XMM registers and vice versa, the lowering forces the f64 to be spilled to
   /// the stack and then accesses through the VariableSplit.
   // TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit
@@ -696,11 +696,11 @@
     Portion Part;
   };
 
-  /// SpillVariable decorates a Variable by linking it to another Variable.
-  /// When stack frame offsets are computed, the SpillVariable is given a
-  /// distinct stack slot only if its linked Variable has a register.  If the
-  /// linked Variable has a stack slot, then the Variable and SpillVariable
-  /// share that slot.
+  /// SpillVariable decorates a Variable by linking it to another Variable. When
+  /// stack frame offsets are computed, the SpillVariable is given a distinct
+  /// stack slot only if its linked Variable has a register. If the linked
+  /// Variable has a stack slot, then the Variable and SpillVariable share that
+  /// slot.
   class SpillVariable : public Variable {
     SpillVariable() = delete;
     SpillVariable(const SpillVariable &) = delete;
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index e032ce9..32c3e3b 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -8,9 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares the TargetLoweringX86 template class, which
-/// implements the TargetLowering base interface for the x86
-/// architecture.
+/// This file declares the TargetLoweringX86 template class, which implements
+/// the TargetLowering base interface for the x86 architecture.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -44,7 +43,7 @@
 ///
 /// Note: Ideally, we should be able to
 ///
-///   static_assert(std::is_base_of<TargetX86Base<Machine>, Machine>::value);
+///  static_assert(std::is_base_of<TargetX86Base<Machine>, Machine>::value);
 ///
 /// but that does not work: the compiler does not know that Machine inherits
 /// from TargetX86Base at this point in translation.
@@ -106,13 +105,13 @@
 
   void initNodeForLowering(CfgNode *Node) override;
   /// x86-32: Ensure that a 64-bit Variable has been split into 2 32-bit
-  /// Variables, creating them if necessary.  This is needed for all
-  /// I64 operations, and it is needed for pushing F64 arguments for
-  /// function calls using the 32-bit push instruction (though the
-  /// latter could be done by directly writing to the stack).
+  /// Variables, creating them if necessary. This is needed for all I64
+  /// operations, and it is needed for pushing F64 arguments for function calls
+  /// using the 32-bit push instruction (though the latter could be done by
+  /// directly writing to the stack).
   ///
-  /// x86-64: Complains loudly if invoked because the cpu can handle
-  /// 64-bit types natively.
+  /// x86-64: Complains loudly if invoked because the cpu can handle 64-bit
+  /// types natively.
   template <typename T = Traits>
   typename std::enable_if<!T::Is64Bit, void>::type split64(Variable *Var);
   template <typename T = Traits>
@@ -239,13 +238,12 @@
   void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
                            Operand *Src0, Operand *Src1);
 
-  /// Operand legalization helpers.  To deal with address mode
-  /// constraints, the helpers will create a new Operand and emit
-  /// instructions that guarantee that the Operand kind is one of those
-  /// indicated by the LegalMask (a bitmask of allowed kinds).  If the
-  /// input Operand is known to already meet the constraints, it may be
-  /// simply returned as the result, without creating any new
-  /// instructions or operands.
+  /// Operand legalization helpers. To deal with address mode constraints, the
+  /// helpers will create a new Operand and emit instructions that guarantee
+  /// that the Operand kind is one of those indicated by the LegalMask (a
+  /// bitmask of allowed kinds). If the input Operand is known to already meet
+  /// the constraints, it may be simply returned as the result, without creating
+  /// any new instructions or operands.
   enum OperandLegalization {
     Legal_None = 0,
     Legal_Reg = 1 << 0, // physical register, not stack location
@@ -259,9 +257,9 @@
   Variable *legalizeToReg(Operand *From, int32_t RegNum = Variable::NoRegister);
   /// Legalize the first source operand for use in the cmp instruction.
   Operand *legalizeSrc0ForCmp(Operand *Src0, Operand *Src1);
-  /// Turn a pointer operand into a memory operand that can be
-  /// used by a real load/store operation. Legalizes the operand as well.
-  /// This is a nop if the operand is already a legal memory operand.
+  /// Turn a pointer operand into a memory operand that can be used by a real
+  /// load/store operation. Legalizes the operand as well. This is a nop if the
+  /// operand is already a legal memory operand.
   typename Traits::X86OperandMem *formMemoryOperand(Operand *Ptr, Type Ty,
                                                     bool DoLegalize = true);
 
@@ -271,8 +269,8 @@
   static constexpr uint32_t NoSizeLimit = 0;
   static const Type TypeForSize[];
   /// Returns the largest type which is equal to or larger than Size bytes. The
-  /// type is suitable for copying memory i.e. a load and store will be a
-  /// single instruction (for example x86 will get f64 not i64).
+  /// type is suitable for copying memory i.e. a load and store will be a single
+  /// instruction (for example x86 will get f64 not i64).
   static Type largestTypeInSize(uint32_t Size, uint32_t MaxSize = NoSizeLimit);
   /// Returns the smallest type which is equal to or larger than Size bytes. If
   /// one doesn't exist then the largest type smaller than Size bytes is
@@ -304,9 +302,9 @@
                                 const llvm::SmallBitVector &ExcludeRegisters,
                                 uint64_t Salt) const override;
 
-  /// The following are helpers that insert lowered x86 instructions
-  /// with minimal syntactic overhead, so that the lowering code can
-  /// look as close to assembly as practical.
+  /// The following are helpers that insert lowered x86 instructions with
+  /// minimal syntactic overhead, so that the lowering code can look as close to
+  /// assembly as practical.
   void _adc(Variable *Dest, Operand *Src0) {
     Context.insert(Traits::Insts::Adc::create(Func, Dest, Src0));
   }
@@ -450,9 +448,9 @@
     Context.insert(Traits::Insts::Lea::create(Func, Dest, Src0));
   }
   void _mfence() { Context.insert(Traits::Insts::Mfence::create(Func)); }
-  /// If Dest=nullptr is passed in, then a new variable is created,
-  /// marked as infinite register allocation weight, and returned
-  /// through the in/out Dest argument.
+  /// If Dest=nullptr is passed in, then a new variable is created, marked as
+  /// infinite register allocation weight, and returned through the in/out Dest
+  /// argument.
   void _mov(Variable *&Dest, Operand *Src0,
             int32_t RegNum = Variable::NoRegister) {
     if (Dest == nullptr)
@@ -626,8 +624,8 @@
   void _ud2() { Context.insert(Traits::Insts::UD2::create(Func)); }
   void _xadd(Operand *Dest, Variable *Src, bool Locked) {
     Context.insert(Traits::Insts::Xadd::create(Func, Dest, Src, Locked));
-    // The xadd exchanges Dest and Src (modifying Src).
-    // Model that update with a FakeDef followed by a FakeUse.
+    // The xadd exchanges Dest and Src (modifying Src). Model that update with
+    // a FakeDef followed by a FakeUse.
     Context.insert(
         InstFakeDef::create(Func, Src, llvm::dyn_cast<Variable>(Dest)));
     _set_dest_nonkillable();
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index c8bf29f..a63f470 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -8,9 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements the TargetLoweringX86Base class, which
-/// consists almost entirely of the lowering sequence for each
-/// high-level instruction.
+/// This file implements the TargetLoweringX86Base class, which consists almost
+/// entirely of the lowering sequence for each high-level instruction.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -63,13 +62,13 @@
   /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
   bool IsComplex = false;
   /// IsLiveOut is initialized conservatively to true, and is set to false when
-  /// we encounter an instruction that ends Var's live range.  We disable the
-  /// folding optimization when Var is live beyond this basic block.  Note that
+  /// we encounter an instruction that ends Var's live range. We disable the
+  /// folding optimization when Var is live beyond this basic block. Note that
   /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
   /// always be true and the folding optimization will never be performed.
   bool IsLiveOut = true;
   // NumUses counts the number of times Var is used as a source operand in the
-  // basic block.  If IsComplex is true and there is more than one use of Var,
+  // basic block. If IsComplex is true and there is more than one use of Var,
   // then the folding optimization is disabled for Var.
   uint32_t NumUses = 0;
 };
@@ -166,7 +165,7 @@
 /// Returns true if the producing instruction has a "complex" lowering sequence.
 /// This generally means that its lowering sequence requires more than one
 /// conditional branch, namely 64-bit integer compares and some floating-point
-/// compares.  When this is true, and there is more than one consumer, we prefer
+/// compares. When this is true, and there is more than one consumer, we prefer
 /// to disable the folding optimization because it minimizes branches.
 template <class MachineTraits>
 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
@@ -222,9 +221,9 @@
       setInvalid(I.first);
       continue;
     }
-    // Mark as "dead" rather than outright deleting.  This is so that other
+    // Mark as "dead" rather than outright deleting. This is so that other
     // peephole style optimizations during or before lowering have access to
-    // this instruction in undeleted form.  See for example
+    // this instruction in undeleted form. See for example
     // tryOptimizedCmpxchgCmpBr().
     I.second.Instr->setDead();
   }
@@ -303,8 +302,9 @@
 
   // Run this early so it can be used to focus optimizations on potentially hot
   // code.
-  // TODO(stichnot,ascull): currently only used for regalloc not expensive high
-  // level optimizations which could be focused on potentially hot code.
+  // TODO(stichnot,ascull): currently only used for regalloc not
+  // expensive high level optimizations which could be focused on potentially
+  // hot code.
   Func->computeLoopNestDepth();
   Func->dump("After loop nest depth analysis");
 
@@ -312,7 +312,7 @@
   Func->getVMetadata()->init(VMK_SingleDefs);
   Func->doAddressOpt();
 
-  // Find read-modify-write opportunities.  Do this after address mode
+  // Find read-modify-write opportunities. Do this after address mode
   // optimization so that doAddressOpt() doesn't need to be applied to RMW
   // instructions as well.
   findRMW();
@@ -321,8 +321,8 @@
   // Argument lowering
   Func->doArgLowering();
 
-  // Target lowering.  This requires liveness analysis for some parts of the
-  // lowering decisions, such as compare/branch fusing.  If non-lightweight
+  // Target lowering. This requires liveness analysis for some parts of the
+  // lowering decisions, such as compare/branch fusing. If non-lightweight
   // liveness analysis is used, the instructions need to be renumbered first
   // TODO: This renumbering should only be necessary if we're actually
   // calculating live intervals, which we only do for register allocation.
@@ -330,9 +330,9 @@
   if (Func->hasError())
     return;
 
-  // TODO: It should be sufficient to use the fastest liveness calculation, i.e.
-  // livenessLightweight().  However, for some reason that slows down the rest
-  // of the translation.  Investigate.
+  // TODO: It should be sufficient to use the fastest liveness calculation,
+  // i.e. livenessLightweight(). However, for some reason that slows down the
+  // rest of the translation. Investigate.
   Func->liveness(Liveness_Basic);
   if (Func->hasError())
     return;
@@ -357,7 +357,7 @@
   Func->liveness(Liveness_Intervals);
   if (Func->hasError())
     return;
-  // Validate the live range computations.  The expensive validation call is
+  // Validate the live range computations. The expensive validation call is
   // deliberately only made when assertions are enabled.
   assert(Func->validateLiveness());
   // The post-codegen dump is done here, after liveness analysis and associated
@@ -386,9 +386,9 @@
   // Shuffle basic block order if -reorder-basic-blocks is enabled.
   Func->shuffleNodes();
 
-  // Branch optimization.  This needs to be done just before code emission.  In
+  // Branch optimization.  This needs to be done just before code emission. In
   // particular, no transformations that insert or reorder CfgNodes should be
-  // done after branch optimization.  We go ahead and do it before nop insertion
+  // done after branch optimization. We go ahead and do it before nop insertion
   // to reduce the amount of work needed for searching for opportunities.
   Func->doBranchOpt();
   Func->dump("After branch optimization");
@@ -495,10 +495,10 @@
   Ostream &Str = Func->getContext()->getStrDump();
   for (CfgNode *Node : Func->getNodes()) {
     // Walk through the instructions, considering each sequence of 3
-    // instructions, and look for the particular RMW pattern.  Note that this
-    // search can be "broken" (false negatives) if there are intervening deleted
-    // instructions, or intervening instructions that could be safely moved out
-    // of the way to reveal an RMW pattern.
+    // instructions, and look for the particular RMW pattern. Note that this
+    // search can be "broken" (false negatives) if there are intervening
+    // deleted instructions, or intervening instructions that could be safely
+    // moved out of the way to reveal an RMW pattern.
     auto E = Node->getInsts().end();
     auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
     for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
@@ -528,21 +528,21 @@
             // problems later.
             //
             // With this transformation, the Store instruction acquires a Dest
-            // variable and is now subject to dead code elimination if there are
-            // no more uses of "b".  Variable "x" is a beacon for determining
-            // whether the Store instruction gets dead-code eliminated.  If the
-            // Store instruction is eliminated, then it must be the case that
-            // the RMW instruction ends x's live range, and therefore the RMW
-            // instruction will be retained and later lowered.  On the other
-            // hand, if the RMW instruction does not end x's live range, then
-            // the Store instruction must still be present, and therefore the
-            // RMW instruction is ignored during lowering because it is
-            // redundant with the Store instruction.
+            // variable and is now subject to dead code elimination if there
+            // are no more uses of "b".  Variable "x" is a beacon for
+            // determining whether the Store instruction gets dead-code
+            // eliminated.  If the Store instruction is eliminated, then it
+            // must be the case that the RMW instruction ends x's live range,
+            // and therefore the RMW instruction will be retained and later
+            // lowered.  On the other hand, if the RMW instruction does not end
+            // x's live range, then the Store instruction must still be
+            // present, and therefore the RMW instruction is ignored during
+            // lowering because it is redundant with the Store instruction.
             //
             // Note that if "a" has further uses, the RMW transformation may
             // still trigger, resulting in two loads and one store, which is
-            // worse than the original one load and one store.  However, this is
-            // probably rare, and caching probably keeps it just as fast.
+            // worse than the original one load and one store.  However, this
+            // is probably rare, and caching probably keeps it just as fast.
             if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),
                                                   Store->getAddr()))
               continue;
@@ -589,11 +589,10 @@
   return Intrinsics::MemoryOrderInvalid;
 }
 
-/// Determines whether the dest of a Load instruction can be folded
-/// into one of the src operands of a 2-operand instruction.  This is
-/// true as long as the load dest matches exactly one of the binary
-/// instruction's src operands.  Replaces Src0 or Src1 with LoadSrc if
-/// the answer is true.
+/// Determines whether the dest of a Load instruction can be folded into one of
+/// the src operands of a 2-operand instruction. This is true as long as the
+/// load dest matches exactly one of the binary instruction's src operands.
+/// Replaces Src0 or Src1 with LoadSrc if the answer is true.
 inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
                                       Operand *&Src0, Operand *&Src1) {
   if (Src0 == LoadDest && Src1 != LoadDest) {
@@ -615,8 +614,8 @@
       Operand *LoadSrc = nullptr;
       Inst *CurInst = Context.getCur();
       Inst *Next = Context.getNextInst();
-      // Determine whether the current instruction is a Load
-      // instruction or equivalent.
+      // Determine whether the current instruction is a Load instruction or
+      // equivalent.
       if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
         // An InstLoad always qualifies.
         LoadDest = Load->getDest();
@@ -624,9 +623,9 @@
         LoadSrc = formMemoryOperand(Load->getSourceAddress(),
                                     LoadDest->getType(), DoLegalize);
       } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
-        // An AtomicLoad intrinsic qualifies as long as it has a valid
-        // memory ordering, and can be implemented in a single
-        // instruction (i.e., not i64 on x86-32).
+        // An AtomicLoad intrinsic qualifies as long as it has a valid memory
+        // ordering, and can be implemented in a single instruction (i.e., not
+        // i64 on x86-32).
         Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
         if (ID == Intrinsics::AtomicLoad &&
             (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
@@ -638,9 +637,9 @@
                                       DoLegalize);
         }
       }
-      // A Load instruction can be folded into the following
-      // instruction only if the following instruction ends the Load's
-      // Dest variable's live range.
+      // A Load instruction can be folded into the following instruction only
+      // if the following instruction ends the Load's Dest variable's live
+      // range.
       if (LoadDest && Next && Next->isLastUse(LoadDest)) {
         assert(LoadSrc);
         Inst *NewInst = nullptr;
@@ -673,8 +672,7 @@
                                          Select->getCondition(), Src0, Src1);
           }
         } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
-          // The load dest can always be folded into a Cast
-          // instruction.
+          // The load dest can always be folded into a Cast instruction.
           Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
           if (Src0 == LoadDest) {
             NewInst = InstCast::create(Func, Cast->getCastKind(),
@@ -685,8 +683,8 @@
           CurInst->setDeleted();
           Next->setDeleted();
           Context.insert(NewInst);
-          // Update NewInst->LiveRangesEnded so that target lowering
-          // may benefit.  Also update NewInst->HasSideEffects.
+          // Update NewInst->LiveRangesEnded so that target lowering may
+          // benefit. Also update NewInst->HasSideEffects.
           NewInst->spliceLivenessInfo(Next, CurInst);
         }
       }
@@ -721,8 +719,8 @@
     Reg = Func->makeVariable(Ty);
     Reg->setRegNum(RegNum);
     PhysicalRegisters[Ty][RegNum] = Reg;
-    // Specially mark esp as an "argument" so that it is considered
-    // live upon function entry.
+    // Specially mark esp as an "argument" so that it is considered live upon
+    // function entry.
     if (RegNum == Traits::RegisterSet::Reg_esp) {
       Func->addImplicitArg(Reg);
       Reg->setIgnoreLiveness();
@@ -782,13 +780,12 @@
 
 /// Helper function for addProlog().
 ///
-/// This assumes Arg is an argument passed on the stack.  This sets the
-/// frame offset for Arg and updates InArgsSizeBytes according to Arg's
-/// width.  For an I64 arg that has been split into Lo and Hi components,
-/// it calls itself recursively on the components, taking care to handle
-/// Lo first because of the little-endian architecture.  Lastly, this
-/// function generates an instruction to copy Arg into its assigned
-/// register if applicable.
+/// This assumes Arg is an argument passed on the stack. This sets the frame
+/// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
+/// I64 arg that has been split into Lo and Hi components, it calls itself
+/// recursively on the components, taking care to handle Lo first because of the
+/// little-endian architecture. Lastly, this function generates an instruction
+/// to copy Arg into its assigned register if applicable.
 template <class Machine>
 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
                                                     Variable *FramePtr,
@@ -819,8 +816,8 @@
       _mov(Arg, Mem);
     }
     // This argument-copying instruction uses an explicit Traits::X86OperandMem
-    // operand instead of a Variable, so its fill-from-stack operation has to be
-    // tracked separately for statistics.
+    // operand instead of a Variable, so its fill-from-stack operation has to
+    // be tracked separately for statistics.
     Ctx->statsUpdateFills();
   }
 }
@@ -837,9 +834,8 @@
   default:
     return;
   case IceType_i64:
-  // TODO: Only consider F64 if we need to push each half when
-  // passing as an argument to a function call.  Note that each half
-  // is still typed as I32.
+  // TODO: Only consider F64 if we need to push each half when passing as an
+  // argument to a function call. Note that each half is still typed as I32.
   case IceType_f64:
     break;
   }
@@ -946,11 +942,11 @@
 template <class Machine>
 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
   IsEbpBasedFrame = true;
-  // Conservatively require the stack to be aligned.  Some stack
-  // adjustment operations implemented below assume that the stack is
-  // aligned before the alloca.  All the alloca code ensures that the
-  // stack alignment is preserved after the alloca.  The stack alignment
-  // restriction can be relaxed in some cases.
+  // Conservatively require the stack to be aligned. Some stack adjustment
+  // operations implemented below assume that the stack is aligned before the
+  // alloca. All the alloca code ensures that the stack alignment is preserved
+  // after the alloca. The stack alignment restriction can be relaxed in some
+  // cases.
   NeedsStackAlignment = true;
 
   // TODO(stichnot): minimize the number of adjustments of esp, etc.
@@ -977,8 +973,8 @@
     Value = Utils::applyAlignment(Value, Alignment);
     _sub(esp, Ctx->getConstantInt32(Value));
   } else {
-    // Non-constant sizes need to be adjusted to the next highest
-    // multiple of the required alignment at runtime.
+    // Non-constant sizes need to be adjusted to the next highest multiple of
+    // the required alignment at runtime.
     Variable *T = makeReg(IceType_i32);
     _mov(T, TotalSize);
     _add(T, Ctx->getConstantInt32(Alignment - 1));
@@ -988,17 +984,16 @@
   _mov(Dest, esp);
 }
 
-/// Strength-reduce scalar integer multiplication by a constant (for
-/// i32 or narrower) for certain constants.  The lea instruction can be
-/// used to multiply by 3, 5, or 9, and the lsh instruction can be used
-/// to multiply by powers of 2.  These can be combined such that
-/// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
-/// combined with left-shifting by 2.
+/// Strength-reduce scalar integer multiplication by a constant (for i32 or
+/// narrower) for certain constants. The lea instruction can be used to multiply
+/// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
+/// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
+/// lea-based multiplies by 5, combined with left-shifting by 2.
 template <class Machine>
 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
                                                int32_t Src1) {
-  // Disable this optimization for Om1 and O0, just to keep things
-  // simple there.
+  // Disable this optimization for Om1 and O0, just to keep things simple
+  // there.
   if (Ctx->getFlags().getOptLevel() < Opt_1)
     return false;
   Type Ty = Dest->getType();
@@ -1054,8 +1049,8 @@
   // Lea optimization only works for i16 and i32 types, not i8.
   if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
     return false;
-  // Limit the number of lea/shl operations for a single multiply, to
-  // a somewhat arbitrary choice of 3.
+  // Limit the number of lea/shl operations for a single multiply, to a
+  // somewhat arbitrary choice of 3.
   const uint32_t MaxOpsForOptimizedMul = 3;
   if (CountOps > MaxOpsForOptimizedMul)
     return false;
@@ -1101,11 +1096,11 @@
   }
   if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
     // These x86-32 helper-call-involved instructions are lowered in this
-    // separate switch. This is because loOperand() and hiOperand()
-    // may insert redundant instructions for constant blinding and
-    // pooling. Such redundant instructions will fail liveness analysis
-    // under -Om1 setting. And, actually these arguments do not need
-    // to be processed with loOperand() and hiOperand() to be used.
+    // separate switch. This is because loOperand() and hiOperand() may insert
+    // redundant instructions for constant blinding and pooling. Such redundant
+    // instructions will fail liveness analysis under -Om1 setting. And,
+    // actually these arguments do not need to be processed with loOperand()
+    // and hiOperand() to be used.
     switch (Inst->getOp()) {
     case InstArithmetic::Udiv: {
       const SizeT MaxSrcs = 2;
@@ -1216,8 +1211,8 @@
       _imul(T_2, Src0Lo);
       _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
       _mul(T_4Lo, T_3, Src1Lo);
-      // The mul instruction produces two dest variables, edx:eax.  We
-      // create a fake definition of edx to account for this.
+      // The mul instruction produces two dest variables, edx:eax. We create a
+      // fake definition of edx to account for this.
       Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
       _mov(DestLo, T_4Lo);
       _add(T_4Hi, T_1);
@@ -1253,9 +1248,9 @@
       _shl(T_2, T_1);
       _test(T_1, BitTest);
       _br(Traits::Cond::Br_e, Label);
-      // T_2 and T_3 are being assigned again because of the
-      // intra-block control flow, so we need the _mov_nonkillable
-      // variant to avoid liveness problems.
+      // T_2 and T_3 are being assigned again because of the intra-block
+      // control flow, so we need the _mov_nonkillable variant to avoid
+      // liveness problems.
       _mov_nonkillable(T_3, T_2);
       _mov_nonkillable(T_2, Zero);
       Context.insert(Label);
@@ -1289,9 +1284,9 @@
       _shr(T_3, T_1);
       _test(T_1, BitTest);
       _br(Traits::Cond::Br_e, Label);
-      // T_2 and T_3 are being assigned again because of the
-      // intra-block control flow, so we need the _mov_nonkillable
-      // variant to avoid liveness problems.
+      // T_2 and T_3 are being assigned again because of the intra-block
+      // control flow, so we need the _mov_nonkillable variant to avoid
+      // liveness problems.
       _mov_nonkillable(T_2, T_3);
       _mov_nonkillable(T_3, Zero);
       Context.insert(Label);
@@ -1325,10 +1320,10 @@
       _sar(T_3, T_1);
       _test(T_1, BitTest);
       _br(Traits::Cond::Br_e, Label);
-      // T_2 and T_3 are being assigned again because of the
-      // intra-block control flow, so T_2 needs the _mov_nonkillable
-      // variant to avoid liveness problems.  T_3 doesn't need special
-      // treatment because it is reassigned via _sar instead of _mov.
+      // T_2 and T_3 are being assigned again because of the intra-block
+      // control flow, so T_2 needs the _mov_nonkillable variant to avoid
+      // liveness problems. T_3 doesn't need special treatment because it is
+      // reassigned via _sar instead of _mov.
       _mov_nonkillable(T_2, T_3);
       _sar(T_3, SignExtend);
       Context.insert(Label);
@@ -1353,8 +1348,8 @@
     return;
   }
   if (isVectorType(Dest->getType())) {
-    // TODO: Trap on integer divide and integer modulo by zero.
-    // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
+    // TODO: Trap on integer divide and integer modulo by zero. See:
+    // https://code.google.com/p/nativeclient/issues/detail?id=3899
     if (llvm::isa<typename Traits::X86OperandMem>(Src1))
       Src1 = legalizeToReg(Src1);
     switch (Inst->getOp()) {
@@ -1519,8 +1514,8 @@
       if (optimizeScalarMul(Dest, Src0, C->getValue()))
         return;
     }
-    // The 8-bit version of imul only allows the form "imul r/m8"
-    // where T must be in eax.
+    // The 8-bit version of imul only allows the form "imul r/m8" where T must
+    // be in eax.
     if (isByteSizedArithType(Dest->getType())) {
       _mov(T, Src0, Traits::RegisterSet::Reg_eax);
       Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
@@ -1580,11 +1575,11 @@
     }
     break;
   case InstArithmetic::Sdiv:
-    // TODO(stichnot): Enable this after doing better performance
-    // and cross testing.
+    // TODO(stichnot): Enable this after doing better performance and cross
+    // testing.
     if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
-      // Optimize division by constant power of 2, but not for Om1
-      // or O0, just to keep things simple there.
+      // Optimize division by constant power of 2, but not for Om1 or O0, just
+      // to keep things simple there.
       if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
         int32_t Divisor = C->getValue();
         uint32_t UDivisor = static_cast<uint32_t>(Divisor);
@@ -1600,8 +1595,8 @@
           //   dest=t
           uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
           _mov(T, Src0);
-          // If for some reason we are dividing by 1, just treat it
-          // like an assignment.
+          // If for some reason we are dividing by 1, just treat it like an
+          // assignment.
           if (LogDiv > 0) {
             // The initial sar is unnecessary when dividing by 2.
             if (LogDiv > 1)
@@ -1656,11 +1651,11 @@
     }
     break;
   case InstArithmetic::Srem:
-    // TODO(stichnot): Enable this after doing better performance
-    // and cross testing.
+    // TODO(stichnot): Enable this after doing better performance and cross
+    // testing.
     if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
-      // Optimize mod by constant power of 2, but not for Om1 or O0,
-      // just to keep things simple there.
+      // Optimize mod by constant power of 2, but not for Om1 or O0, just to
+      // keep things simple there.
       if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
         int32_t Divisor = C->getValue();
         uint32_t UDivisor = static_cast<uint32_t>(Divisor);
@@ -1777,8 +1772,8 @@
       // memory.
       Src0Legal = legalize(Src0);
     } else {
-      // If Dest could be a stack operand, then RI must be a physical
-      // register or a scalar integer immediate.
+      // If Dest could be a stack operand, then RI must be a physical register
+      // or a scalar integer immediate.
       Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm);
     }
     if (isVectorType(Dest->getType()))
@@ -1803,8 +1798,8 @@
     default:
       break;
     case BoolFolding::PK_Icmp32: {
-      // TODO(stichnot): Refactor similarities between this block and
-      // the corresponding code in lowerIcmp().
+      // TODO(stichnot): Refactor similarities between this block and the
+      // corresponding code in lowerIcmp().
       auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
       Operand *Src0 = Producer->getSrc(0);
       Operand *Src1 = legalize(Producer->getSrc(1));
@@ -1835,10 +1830,10 @@
   case InstCast::Sext: {
     // Src0RM is the source operand legalized to physical register or memory,
     // but not immediate, since the relevant x86 native instructions don't
-    // allow an immediate operand.  If the operand is an immediate, we could
-    // consider computing the strength-reduced result at translation time,
-    // but we're unlikely to see something like that in the bitcode that
-    // the optimizer wouldn't have already taken care of.
+    // allow an immediate operand. If the operand is an immediate, we could
+    // consider computing the strength-reduced result at translation time, but
+    // we're unlikely to see something like that in the bitcode that the
+    // optimizer wouldn't have already taken care of.
     Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
     if (isVectorType(Dest->getType())) {
       Type DestTy = Dest->getType();
@@ -1898,8 +1893,8 @@
           typeWidthInBytes(Src0RM->getType())) {
         _mov(T, Src0RM);
       } else {
-        // Widen the source using movsx or movzx.  (It doesn't matter
-        // which one, since the following shl/sar overwrite the bits.)
+        // Widen the source using movsx or movzx. (It doesn't matter which one,
+        // since the following shl/sar overwrite the bits.)
         _movzx(T, Src0RM);
       }
       _shl(T, ShiftAmount);
@@ -2010,12 +2005,11 @@
       _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
       _movp(Dest, T);
     } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
-      // Use a helper for converting floating-point values to 64-bit
-      // integers.  SSE2 appears to have no way to convert from xmm
-      // registers to something like the edx:eax register pair, and
-      // gcc and clang both want to use x87 instructions complete with
-      // temporary manipulation of the status word.  This helper is
-      // not needed for x86-64.
+      // Use a helper for converting floating-point values to 64-bit integers.
+      // SSE2 appears to have no way to convert from xmm registers to something
+      // like the edx:eax register pair, and gcc and clang both want to use x87
+      // instructions complete with temporary manipulation of the status word.
+      // This helper is not needed for x86-64.
       split64(Dest);
       const SizeT MaxSrcs = 1;
       Type SrcType = Inst->getSrc(0)->getType();
@@ -2150,8 +2144,8 @@
       lowerCall(Call);
     } else if (Src0->getType() == IceType_i64 ||
                (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
-      // Use a helper for x86-32 and x86-64.  Also use a helper for
-      // i32 on x86-32.
+      // Use a helper for x86-32 and x86-64. Also use a helper for i32 on
+      // x86-32.
       const SizeT MaxSrcs = 1;
       Type DestType = Dest->getType();
       IceString TargetString;
@@ -2285,8 +2279,8 @@
       if (Traits::Is64Bit) {
         Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
         Variable *T = makeReg(IceType_f64);
-        // Movd requires its fp argument (in this case, the bitcast destination)
-        // to be an xmm register.
+        // Movd requires its fp argument (in this case, the bitcast
+        // destination) to be an xmm register.
         T->setMustHaveReg();
         _movd(T, Src0RM);
         _mov(Dest, T);
@@ -2318,8 +2312,8 @@
             Func, Spill, Traits::VariableSplit::High);
         _mov(T_Lo, loOperand(Src0));
         // Technically, the Spill is defined after the _store happens, but
-        // SpillLo is considered a "use" of Spill so define Spill before it
-        // is used.
+        // SpillLo is considered a "use" of Spill so define Spill before it is
+        // used.
         Context.insert(InstFakeDef::create(Func, Spill));
         _store(T_Lo, SpillLo);
         _mov(T_Hi, hiOperand(Src0));
@@ -2384,8 +2378,8 @@
     // Use pshufd and movd/movss.
     Variable *T = nullptr;
     if (Index) {
-      // The shuffle only needs to occur if the element to be extracted
-      // is not at the lowest index.
+      // The shuffle only needs to occur if the element to be extracted is not
+      // at the lowest index.
       Constant *Mask = Ctx->getConstantInt32(Index);
       T = makeReg(Ty);
       _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
@@ -2396,11 +2390,11 @@
     if (InVectorElementTy == IceType_i32) {
       _movd(ExtractedElementR, T);
     } else { // Ty == IceType_f32
-      // TODO(wala): _movss is only used here because _mov does not
-      // allow a vector source and a scalar destination.  _mov should be
-      // able to be used here.
-      // _movss is a binary instruction, so the FakeDef is needed to
-      // keep the live range analysis consistent.
+      // TODO(wala): _movss is only used here because _mov does not allow a
+      // vector source and a scalar destination.  _mov should be able to be
+      // used here.
+      // _movss is a binary instruction, so the FakeDef is needed to keep the
+      // live range analysis consistent.
       Context.insert(InstFakeDef::create(Func, ExtractedElementR));
       _movss(ExtractedElementR, T);
     }
@@ -2408,8 +2402,8 @@
     assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
     // Spill the value to a stack slot and do the extraction in memory.
     //
-    // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
-    // support for legalizing to mem is implemented.
+    // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
+    // for legalizing to mem is implemented.
     Variable *Slot = Func->makeVariable(Ty);
     Slot->setMustNotHaveReg();
     _movp(Slot, legalizeToReg(SourceVectNotLegalized));
@@ -2589,9 +2583,9 @@
     Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
     Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
 
-    // SSE2 only has signed comparison operations.  Transform unsigned
-    // inputs in a manner that allows for the use of signed comparison
-    // operations by flipping the high order bits.
+    // SSE2 only has signed comparison operations. Transform unsigned inputs in
+    // a manner that allows for the use of signed comparison operations by
+    // flipping the high order bits.
     if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
         Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
       Variable *T0 = makeReg(Ty);
@@ -2726,8 +2720,8 @@
   Type InVectorElementTy = Traits::getInVectorElementType(Ty);
 
   if (ElementTy == IceType_i1) {
-    // Expand the element to the appropriate size for it to be inserted
-    // in the vector.
+    // Expand the element to the appropriate size for it to be inserted in the
+    // vector.
     Variable *Expanded = Func->makeVariable(InVectorElementTy);
     InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
                                       ElementToInsertNotLegalized);
@@ -2773,14 +2767,13 @@
       return;
     }
 
-    // shufps treats the source and desination operands as vectors of
-    // four doublewords.  The destination's two high doublewords are
-    // selected from the source operand and the two low doublewords are
-    // selected from the (original value of) the destination operand.
-    // An insertelement operation can be effected with a sequence of two
-    // shufps operations with appropriate masks.  In all cases below,
-    // Element[0] is being inserted into SourceVectOperand.  Indices are
-    // ordered from left to right.
+    // shufps treats the source and destination operands as vectors of four
+    // doublewords. The destination's two high doublewords are selected from
+    // the source operand and the two low doublewords are selected from the
+    // (original value of) the destination operand. An insertelement operation
+    // can be effected with a sequence of two shufps operations with
+    // appropriate masks. In all cases below, Element[0] is being inserted into
+    // SourceVectOperand. Indices are ordered from left to right.
     //
     // insertelement into index 1 (result is stored in ElementR):
     //   ElementR := ElementR[0, 0] SourceVectRM[0, 0]
@@ -2814,11 +2807,10 @@
     }
   } else {
     assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
-    // Spill the value to a stack slot and perform the insertion in
-    // memory.
+    // Spill the value to a stack slot and perform the insertion in memory.
     //
-    // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
-    // support for legalizing to mem is implemented.
+    // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
+    // for legalizing to mem is implemented.
     Variable *Slot = Func->makeVariable(Ty);
     Slot->setMustNotHaveReg();
     _movp(Slot, legalizeToReg(SourceVectNotLegalized));
@@ -2864,25 +2856,25 @@
     _mfence();
     return;
   case Intrinsics::AtomicFenceAll:
-    // NOTE: FenceAll should prevent and load/store from being moved
-    // across the fence (both atomic and non-atomic). The InstX8632Mfence
-    // instruction is currently marked coarsely as "HasSideEffects".
+    // NOTE: FenceAll should prevent and load/store from being moved across the
+    // fence (both atomic and non-atomic). The InstX8632Mfence instruction is
+    // currently marked coarsely as "HasSideEffects".
     _mfence();
     return;
   case Intrinsics::AtomicIsLockFree: {
     // X86 is always lock free for 8/16/32/64 bit accesses.
-    // TODO(jvoung): Since the result is constant when given a constant
-    // byte size, this opens up DCE opportunities.
+    // TODO(jvoung): Since the result is constant when given a constant byte
+    // size, this opens up DCE opportunities.
     Operand *ByteSize = Instr->getArg(0);
     Variable *Dest = Instr->getDest();
     if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
       Constant *Result;
       switch (CI->getValue()) {
       default:
-        // Some x86-64 processors support the cmpxchg16b intruction, which
-        // can make 16-byte operations lock free (when used with the LOCK
-        // prefix). However, that's not supported in 32-bit mode, so just
-        // return 0 even for large sizes.
+        // Some x86-64 processors support the cmpxchg16b instruction, which can
+        // make 16-byte operations lock free (when used with the LOCK prefix).
+        // However, that's not supported in 32-bit mode, so just return 0 even
+        // for large sizes.
         Result = Ctx->getConstantZero(IceType_i32);
         break;
       case 1:
@@ -2900,8 +2892,8 @@
     return;
   }
   case Intrinsics::AtomicLoad: {
-    // We require the memory address to be naturally aligned.
-    // Given that is the case, then normal loads are atomic.
+    // We require the memory address to be naturally aligned. Given that is the
+    // case, then normal loads are atomic.
     if (!Intrinsics::isMemoryOrderValid(
             ID, getConstantMemoryOrder(Instr->getArg(1)))) {
       Func->setError("Unexpected memory ordering for AtomicLoad");
@@ -2910,10 +2902,10 @@
     Variable *Dest = Instr->getDest();
     if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
       // Follow what GCC does and use a movq instead of what lowerLoad()
-      // normally does (split the load into two).
-      // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
-      // can't happen anyway, since this is x86-32 and integer arithmetic only
-      // happens on 32-bit quantities.
+      // normally does (split the load into two). Thus, this skips
+      // load/arithmetic op folding. Load/arithmetic folding can't happen
+      // anyway, since this is x86-32 and integer arithmetic only happens on
+      // 32-bit quantities.
       Variable *T = makeReg(IceType_f64);
       typename Traits::X86OperandMem *Addr =
           formMemoryOperand(Instr->getArg(0), IceType_f64);
@@ -2929,8 +2921,8 @@
     InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
     lowerLoad(Load);
     // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
-    // Since lowerLoad may fuse the load w/ an arithmetic instruction,
-    // insert the FakeUse on the last-inserted instruction's dest.
+    // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
+    // the FakeUse on the last-inserted instruction's dest.
     Context.insert(
         InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
     return;
@@ -2953,15 +2945,15 @@
       Func->setError("Unexpected memory ordering for AtomicStore");
       return;
     }
-    // We require the memory address to be naturally aligned.
-    // Given that is the case, then normal stores are atomic.
-    // Add a fence after the store to make it visible.
+    // We require the memory address to be naturally aligned. Given that is the
+    // case, then normal stores are atomic. Add a fence after the store to make
+    // it visible.
     Operand *Value = Instr->getArg(0);
     Operand *Ptr = Instr->getArg(1);
     if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
-      // Use a movq instead of what lowerStore() normally does
-      // (split the store into two), following what GCC does.
-      // Cast the bits from int -> to an xmm register first.
+      // Use a movq instead of what lowerStore() normally does (split the store
+      // into two), following what GCC does. Cast the bits from int -> to an
+      // xmm register first.
       Variable *T = makeReg(IceType_f64);
       InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
       lowerCast(Cast);
@@ -2980,8 +2972,8 @@
   case Intrinsics::Bswap: {
     Variable *Dest = Instr->getDest();
     Operand *Val = Instr->getArg(0);
-    // In 32-bit mode, bswap only works on 32-bit arguments, and the
-    // argument must be a register. Use rotate left for 16-bit bswap.
+    // In 32-bit mode, bswap only works on 32-bit arguments, and the argument
+    // must be a register. Use rotate left for 16-bit bswap.
     if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
       Val = legalizeUndef(Val);
       Variable *T_Lo = legalizeToReg(loOperand(Val));
@@ -3070,8 +3062,8 @@
     return;
   }
   case Intrinsics::Ctlz: {
-    // The "is zero undef" parameter is ignored and we always return
-    // a well-defined value.
+    // The "is zero undef" parameter is ignored and we always return a
+    // well-defined value.
     Operand *Val = legalize(Instr->getArg(0));
     Operand *FirstVal;
     Operand *SecondVal = nullptr;
@@ -3087,8 +3079,8 @@
     return;
   }
   case Intrinsics::Cttz: {
-    // The "is zero undef" parameter is ignored and we always return
-    // a well-defined value.
+    // The "is zero undef" parameter is ignored and we always return a
+    // well-defined value.
     Operand *Val = legalize(Instr->getArg(0));
     Operand *FirstVal;
     Operand *SecondVal = nullptr;
@@ -3108,8 +3100,8 @@
     Type Ty = Src->getType();
     Variable *Dest = Instr->getDest();
     Variable *T = makeVectorOfFabsMask(Ty);
-    // The pand instruction operates on an m128 memory operand, so if
-    // Src is an f32 or f64, we need to make sure it's in a register.
+    // The pand instruction operates on an m128 memory operand, so if Src is an
+    // f32 or f64, we need to make sure it's in a register.
     if (isVectorType(Ty)) {
       if (llvm::isa<typename Traits::X86OperandMem>(Src))
         Src = legalizeToReg(Src);
@@ -3694,8 +3686,8 @@
     Variable *Reg;
 
     // Copy the data into registers as the source and destination could overlap
-    // so make sure not to clobber the memory. This also means overlapping moves
-    // can be used as we are taking a safe snapshot of the memory.
+    // so make sure not to clobber the memory. This also means overlapping
+    // moves can be used as we are taking a safe snapshot of the memory.
     Type Ty = largestTypeInSize(CountValue);
     uint32_t TyWidth = typeWidthInBytes(Ty);
 
@@ -3896,8 +3888,7 @@
 
 inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata,
                                   Variable *&Var, const Inst *&Reason) {
-  // Var originates from Var=SrcVar ==>
-  //   set Var:=SrcVar
+  // Var originates from Var=SrcVar ==> set Var:=SrcVar
   if (Var == nullptr)
     return false;
   if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
@@ -4059,10 +4050,10 @@
   (void)Offset; // TODO: pattern-match for non-zero offsets.
   if (Base == nullptr)
     return;
-  // If the Base has more than one use or is live across multiple
-  // blocks, then don't go further.  Alternatively (?), never consider
-  // a transformation that would change a variable that is currently
-  // *not* live across basic block boundaries into one that *is*.
+  // If the Base has more than one use or is live across multiple blocks, then
+  // don't go further. Alternatively (?), never consider a transformation that
+  // would change a variable that is currently *not* live across basic block
+  // boundaries into one that *is*.
   if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
     return;
 
@@ -4232,8 +4223,8 @@
     Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
     Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
     if (InstructionSet >= Traits::SSE4_1) {
-      // TODO(wala): If the condition operand is a constant, use blendps
-      // or pblendw.
+      // TODO(wala): If the condition operand is a constant, use blendps or
+      // pblendw.
       //
       // Use blendvps or pblendvb to implement select.
       if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
@@ -4310,8 +4301,8 @@
 
   _cmp(CmpOpnd0, CmpOpnd1);
   if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
-    // The cmov instruction doesn't allow 8-bit or FP operands, so
-    // we need explicit control flow.
+    // The cmov instruction doesn't allow 8-bit or FP operands, so we need
+    // explicit control flow.
     // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
     typename Traits::Insts::Label *Label =
         Traits::Insts::Label::create(Func, this);
@@ -4324,8 +4315,8 @@
     return;
   }
   // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
-  // But if SrcT is immediate, we might be able to do better, as
-  // the cmov instruction doesn't allow an immediate operand:
+  // But if SrcT is immediate, we might be able to do better, as the cmov
+  // instruction doesn't allow an immediate operand:
   // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
   if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
     std::swap(SrcT, SrcF);
@@ -4686,8 +4677,8 @@
 ///   %cmp.ext = sext <n x i1> %cmp to <n x ty>
 ///
 /// We can eliminate the sext operation by copying the result of pcmpeqd,
-/// pcmpgtd, or cmpps (which produce sign extended results) to the result
-/// of the sext operation.
+/// pcmpgtd, or cmpps (which produce sign extended results) to the result of the
+/// sext operation.
 template <class Machine>
 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(
     Variable *SignExtendedResult) {
@@ -4712,13 +4703,12 @@
 template <class Machine>
 void TargetX86Base<Machine>::lowerRMW(
     const typename Traits::Insts::FakeRMW *RMW) {
-  // If the beacon variable's live range does not end in this
-  // instruction, then it must end in the modified Store instruction
-  // that follows.  This means that the original Store instruction is
-  // still there, either because the value being stored is used beyond
-  // the Store instruction, or because dead code elimination did not
-  // happen.  In either case, we cancel RMW lowering (and the caller
-  // deletes the RMW instruction).
+  // If the beacon variable's live range does not end in this instruction, then
+  // it must end in the modified Store instruction that follows. This means
+  // that the original Store instruction is still there, either because the
+  // value being stored is used beyond the Store instruction, or because dead
+  // code elimination did not happen. In either case, we cancel RMW lowering
+  // (and the caller deletes the RMW instruction).
   if (!RMW->isLastUse(RMW->getBeacon()))
     return;
   Operand *Src = RMW->getData();
@@ -4800,10 +4790,9 @@
   }
 }
 
-/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
-/// preserve integrity of liveness analysis.  Undef values are also
-/// turned into zeroes, since loOperand() and hiOperand() don't expect
-/// Undef input.
+/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
+/// integrity of liveness analysis. Undef values are also turned into zeroes,
+/// since loOperand() and hiOperand() don't expect Undef input.
 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
   if (Traits::Is64Bit) {
     // On x86-64 we don't need to prelower phis -- the architecture can handle
@@ -4811,25 +4800,25 @@
     return;
   }
 
-  // Pause constant blinding or pooling, blinding or pooling will be done
-  // later during phi lowering assignments
+  // Pause constant blinding or pooling, blinding or pooling will be done later
+  // during phi lowering assignments
   BoolFlagSaver B(RandomizationPoolingPaused, true);
   PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
       this, Context.getNode(), Func);
 }
 
-// There is no support for loading or emitting vector constants, so the
-// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
-// etc. are initialized with register operations.
+// There is no support for loading or emitting vector constants, so the vector
+// values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are
+// initialized with register operations.
 //
-// TODO(wala): Add limited support for vector constants so that
-// complex initialization in registers is unnecessary.
+// TODO(wala): Add limited support for vector constants so that complex
+// initialization in registers is unnecessary.
 
 template <class Machine>
 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {
   Variable *Reg = makeReg(Ty, RegNum);
-  // Insert a FakeDef, since otherwise the live range of Reg might
-  // be overestimated.
+  // Insert a FakeDef, since otherwise the live range of Reg might be
+  // overestimated.
   Context.insert(InstFakeDef::create(Func, Reg));
   _pxor(Reg, Reg);
   return Reg;
@@ -4875,12 +4864,12 @@
   }
 }
 
-/// Construct a mask in a register that can be and'ed with a
-/// floating-point value to mask off its sign bit.  The value will be
-/// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
-/// for f64.  Construct it as vector of ones logically right shifted
-/// one bit.  TODO(stichnot): Fix the wala TODO above, to represent
-/// vector constants in memory.
+/// Construct a mask in a register that can be and'ed with a floating-point
+/// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32
+/// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of
+/// ones logically right shifted one bit.
+// TODO(stichnot): Fix the wala
+// TODO: above, to represent vector constants in memory.
 template <class Machine>
 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
                                                        int32_t RegNum) {
@@ -4897,9 +4886,9 @@
   assert(Slot->mustNotHaveReg());
   assert(Slot->getRegNum() == Variable::NoRegister);
   // Compute the location of Loc in memory.
-  // TODO(wala,stichnot): lea should not be required.  The address of
-  // the stack slot is known at compile time (although not until after
-  // addProlog()).
+  // TODO(wala,stichnot): lea should not
+  // be required. The address of the stack slot is known at compile time
+  // (although not until after addProlog()).
   const Type PointerType = IceType_i32;
   Variable *Loc = makeReg(PointerType);
   _lea(Loc, Slot);
@@ -4925,20 +4914,19 @@
 Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
                                           int32_t RegNum) {
   Type Ty = From->getType();
-  // Assert that a physical register is allowed.  To date, all calls
-  // to legalize() allow a physical register.  If a physical register
-  // needs to be explicitly disallowed, then new code will need to be
-  // written to force a spill.
+  // Assert that a physical register is allowed. To date, all calls to
+  // legalize() allow a physical register. If a physical register needs to be
+  // explicitly disallowed, then new code will need to be written to force a
+  // spill.
   assert(Allowed & Legal_Reg);
-  // If we're asking for a specific physical register, make sure we're
-  // not allowing any other operand kinds.  (This could be future
-  // work, e.g. allow the shl shift amount to be either an immediate
-  // or in ecx.)
+  // If we're asking for a specific physical register, make sure we're not
+  // allowing any other operand kinds. (This could be future work, e.g. allow
+  // the shl shift amount to be either an immediate or in ecx.)
   assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
 
   if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
-    // Before doing anything with a Mem operand, we need to ensure
-    // that the Base and Index components are in physical registers.
+    // Before doing anything with a Mem operand, we need to ensure that the
+    // Base and Index components are in physical registers.
     Variable *Base = Mem->getBase();
     Variable *Index = Mem->getIndex();
     Variable *RegBase = nullptr;
@@ -4983,8 +4971,8 @@
       }
     }
 
-    // If the operand is an 32 bit constant integer, we should check
-    // whether we need to randomize it or pool it.
+    // If the operand is an 32 bit constant integer, we should check whether we
+    // need to randomize it or pool it.
     if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
       Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
       if (NewConst != Const) {
@@ -4992,8 +4980,8 @@
       }
     }
 
-    // Convert a scalar floating point constant into an explicit
-    // memory operand.
+    // Convert a scalar floating point constant into an explicit memory
+    // operand.
     if (isScalarFloatingType(Ty)) {
       Variable *Base = nullptr;
       std::string Buffer;
@@ -5016,9 +5004,9 @@
     return From;
   }
   if (auto Var = llvm::dyn_cast<Variable>(From)) {
-    // Check if the variable is guaranteed a physical register.  This
-    // can happen either when the variable is pre-colored or when it is
-    // assigned infinite weight.
+    // Check if the variable is guaranteed a physical register. This can happen
+    // either when the variable is pre-colored or when it is assigned infinite
+    // weight.
     bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
     // We need a new physical register for the operand if:
     //   Mem is not allowed and Var isn't guaranteed a physical
@@ -5046,16 +5034,16 @@
   Type Ty = From->getType();
   if (llvm::isa<ConstantUndef>(From)) {
     // Lower undefs to zero.  Another option is to lower undefs to an
-    // uninitialized register; however, using an uninitialized register
-    // results in less predictable code.
+    // uninitialized register; however, using an uninitialized register results
+    // in less predictable code.
     //
-    // If in the future the implementation is changed to lower undef
-    // values to uninitialized registers, a FakeDef will be needed:
+    // If in the future the implementation is changed to lower undef values to
+    // uninitialized registers, a FakeDef will be needed:
     //     Context.insert(InstFakeDef::create(Func, Reg));
     // This is in order to ensure that the live range of Reg is not
-    // overestimated.  If the constant being lowered is a 64 bit value,
-    // then the result should be split and the lo and hi components will
-    // need to go in uninitialized registers.
+    // overestimated.  If the constant being lowered is a 64 bit value, then
+    // the result should be split and the lo and hi components will need to go
+    // in uninitialized registers.
     if (isVectorType(Ty))
       return makeVectorOfZeros(Ty, RegNum);
     return Ctx->getConstantZero(Ty);
@@ -5063,12 +5051,11 @@
   return From;
 }
 
-/// For the cmp instruction, if Src1 is an immediate, or known to be a
-/// physical register, we can allow Src0 to be a memory operand.
-/// Otherwise, Src0 must be copied into a physical register.
-/// (Actually, either Src0 or Src1 can be chosen for the physical
-/// register, but unfortunately we have to commit to one or the other
-/// before register allocation.)
+/// For the cmp instruction, if Src1 is an immediate, or known to be a physical
+/// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be
+/// copied into a physical register. (Actually, either Src0 or Src1 can be
+/// chosen for the physical register, but unfortunately we have to commit to one
+/// or the other before register allocation.)
 template <class Machine>
 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,
                                                     Operand *Src1) {
@@ -5095,11 +5082,10 @@
     Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
     assert(Base || Offset);
     if (Offset) {
-      // During memory operand building, we do not blind or pool
-      // the constant offset, we will work on the whole memory
-      // operand later as one entity later, this save one instruction.
-      // By turning blinding and pooling off, we guarantee
-      // legalize(Offset) will return a Constant*.
+      // During memory operand building, we do not blind or pool the constant
+      // offset, we will work on the whole memory operand later as one entity
+      // later, this save one instruction. By turning blinding and pooling off,
+      // we guarantee legalize(Offset) will return a Constant*.
       {
         BoolFlagSaver B(RandomizationPoolingPaused, true);
 
@@ -5111,8 +5097,8 @@
     }
     Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
   }
-  // Do legalization, which contains randomization/pooling
-  // or do randomization/pooling.
+  // Do legalization, which contains randomization/pooling or do
+  // randomization/pooling.
   return llvm::cast<typename Traits::X86OperandMem>(
       DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
 }
@@ -5235,11 +5221,10 @@
       //  insert: lea -cookie[Reg], Reg
       //  => Reg
       // If we have already assigned a phy register, we must come from
-      // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
-      // the assigned register as this assignment is that start of its use-def
-      // chain. So we add RegNum argument here.
-      // Note we use 'lea' instruction instead of 'xor' to avoid affecting
-      // the flags.
+      // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
+      // assigned register as this assignment is that start of its use-def
+      // chain. So we add RegNum argument here. Note we use 'lea' instruction
+      // instead of 'xor' to avoid affecting the flags.
       Variable *Reg = makeReg(IceType_i32, RegNum);
       ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);
       uint32_t Value = Integer->getValue();
@@ -5268,8 +5253,8 @@
       assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
       Immediate->setShouldBePooled(true);
       // if we have already assigned a phy register, we must come from
-      // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
-      // the assigned register as this assignment is that start of its use-def
+      // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
+      // assigned register as this assignment is that start of its use-def
       // chain. So we add RegNum argument here.
       Variable *Reg = makeReg(Immediate->getType(), RegNum);
       IceString Label;
@@ -5302,8 +5287,8 @@
     return MemOperand;
   }
 
-  // If this memory operand is already a randommized one, we do
-  // not randomize it again.
+  // If this memory operand is already a randomized one, we do not randomize it
+  // again.
   if (MemOperand->getRandomized())
     return MemOperand;
 
@@ -5338,9 +5323,8 @@
         Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
         _lea(RegTemp, TempMemOperand);
         // As source operand doesn't use the dstreg, we don't need to add
-        // _set_dest_nonkillable().
-        // But if we use the same Dest Reg, that is, with RegNum
-        // assigned, we should add this _set_dest_nonkillable()
+        // _set_dest_nonkillable(). But if we use the same Dest Reg, that is,
+        // with RegNum assigned, we should add this _set_dest_nonkillable()
         if (RegNum != Variable::NoRegister)
           _set_dest_nonkillable();
 
@@ -5366,12 +5350,11 @@
         //  =>[RegTemp, index, shift]
         assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
                RPI_Pool);
-        // Memory operand should never exist as source operands in phi
-        // lowering assignments, so there is no need to reuse any registers
-        // here. For phi lowering, we should not ask for new physical
-        // registers in general.
-        // However, if we do meet Memory Operand during phi lowering, we
-        // should not blind or pool the immediates for now.
+        // Memory operand should never exist as source operands in phi lowering
+        // assignments, so there is no need to reuse any registers here. For
+        // phi lowering, we should not ask for new physical registers in
+        // general. However, if we do meet Memory Operand during phi lowering,
+        // we should not blind or pool the immediates for now.
         if (RegNum != Variable::NoRegister)
           return MemOperand;
         Variable *RegTemp = makeReg(IceType_i32);
diff --git a/src/IceThreading.h b/src/IceThreading.h
index f59f46e..b0bcc01 100644
--- a/src/IceThreading.h
+++ b/src/IceThreading.h
@@ -22,31 +22,29 @@
 
 namespace Ice {
 
-/// BoundedProducerConsumerQueue is a work queue that allows multiple
-/// producers and multiple consumers.  A producer adds entries using
-/// blockingPush(), and may block if the queue is "full".  A producer
-/// uses notifyEnd() to indicate that no more entries will be added.  A
-/// consumer removes an item using blockingPop(), which will return
-/// nullptr if notifyEnd() has been called and the queue is empty (it
-/// never returns nullptr if the queue contained any items).
+/// BoundedProducerConsumerQueue is a work queue that allows multiple producers
+/// and multiple consumers. A producer adds entries using blockingPush(), and
+/// may block if the queue is "full". A producer uses notifyEnd() to indicate
+/// that no more entries will be added. A consumer removes an item using
+/// blockingPop(), which will return nullptr if notifyEnd() has been called and
+/// the queue is empty (it never returns nullptr if the queue contained any
+/// items).
 ///
-/// The MaxSize ctor arg controls the maximum size the queue can grow
-/// to (subject to a hard limit of MaxStaticSize-1).  The Sequential
-/// arg indicates purely sequential execution in which the single
-/// thread should never wait().
+/// The MaxSize ctor arg controls the maximum size the queue can grow to
+/// (subject to a hard limit of MaxStaticSize-1). The Sequential arg indicates
+/// purely sequential execution in which the single thread should never wait().
 ///
-/// Two condition variables are used in the implementation.
-/// GrewOrEnded signals a waiting worker that a producer has changed
-/// the state of the queue.  Shrunk signals a blocked producer that a
-/// consumer has changed the state of the queue.
+/// Two condition variables are used in the implementation. GrewOrEnded signals
+/// a waiting worker that a producer has changed the state of the queue. Shrunk
+/// signals a blocked producer that a consumer has changed the state of the
+/// queue.
 ///
-/// The methods begin with Sequential-specific code to be most clear.
-/// The lock and condition variables are not used in the Sequential
-/// case.
+/// The methods begin with Sequential-specific code to be most clear. The lock
+/// and condition variables are not used in the Sequential case.
 ///
 /// Internally, the queue is implemented as a circular array of size
-/// MaxStaticSize, where the queue boundaries are denoted by the Front
-/// and Back fields.  Front==Back indicates an empty queue.
+/// MaxStaticSize, where the queue boundaries are denoted by the Front and Back
+/// fields. Front==Back indicates an empty queue.
 template <typename T, size_t MaxStaticSize = 128>
 class BoundedProducerConsumerQueue {
   BoundedProducerConsumerQueue() = delete;
@@ -60,8 +58,8 @@
   void blockingPush(T *Item) {
     {
       std::unique_lock<GlobalLockType> L(Lock);
-      // If the work queue is already "full", wait for a consumer to
-      // grab an element and shrink the queue.
+      // If the work queue is already "full", wait for a consumer to grab an
+      // element and shrink the queue.
       Shrunk.wait(L, [this] { return size() < MaxSize || Sequential; });
       push(Item);
     }
@@ -103,27 +101,23 @@
   GlobalLockType Lock;
 
   ICE_CACHELINE_BOUNDARY;
-  /// GrewOrEnded is written by the producers and read by the
-  /// consumers.  It is notified (by the producer) when something is
-  /// added to the queue, in case consumers are waiting for a non-empty
-  /// queue.
+  /// GrewOrEnded is written by the producers and read by the consumers. It is
+  /// notified (by the producer) when something is added to the queue, in case
+  /// consumers are waiting for a non-empty queue.
   std::condition_variable GrewOrEnded;
-  /// Back is the index into WorkItems[] of where the next element will
-  /// be pushed.  (More precisely, Back&MaxStaticSize is the index.)
-  /// It is written by the producers, and read by all via size() and
-  /// empty().
+  /// Back is the index into WorkItems[] of where the next element will be
+  /// pushed. (More precisely, Back&MaxStaticSize is the index.) It is written
+  /// by the producers, and read by all via size() and empty().
   size_t Back = 0;
 
   ICE_CACHELINE_BOUNDARY;
-  /// Shrunk is notified (by the consumer) when something is removed
-  /// from the queue, in case a producer is waiting for the queue to
-  /// drop below maximum capacity.  It is written by the consumers and
-  /// read by the producers.
+  /// Shrunk is notified (by the consumer) when something is removed from the
+  /// queue, in case a producer is waiting for the queue to drop below maximum
+  /// capacity. It is written by the consumers and read by the producers.
   std::condition_variable Shrunk;
-  /// Front is the index into WorkItems[] of the oldest element,
-  /// i.e. the next to be popped.  (More precisely Front&MaxStaticSize
-  /// is the index.)  It is written by the consumers, and read by all
-  /// via size() and empty().
+  /// Front is the index into WorkItems[] of the oldest element, i.e. the next
+  /// to be popped. (More precisely Front&MaxStaticSize is the index.) It is
+  /// written by the consumers, and read by all via size() and empty().
   size_t Front = 0;
 
   ICE_CACHELINE_BOUNDARY;
@@ -131,8 +125,7 @@
   /// MaxSize and Sequential are read by all and written by none.
   const size_t MaxSize;
   const bool Sequential;
-  /// IsEnded is read by the consumers, and only written once by the
-  /// producer.
+  /// IsEnded is read by the consumers, and only written once by the producer.
   bool IsEnded = false;
 
   /// The lock must be held when the following methods are called.
@@ -148,15 +141,14 @@
   }
 };
 
-/// EmitterWorkItem is a simple wrapper around a pointer that
-/// represents a work item to be emitted, i.e. a function or a set of
-/// global declarations and initializers, and it includes a sequence
-/// number so that work items can be emitted in a particular order for
-/// deterministic output.  It acts like an interface class, but instead
-/// of making the classes of interest inherit from EmitterWorkItem, it
-/// wraps pointers to these classes.  Some space is wasted compared to
-/// storing the pointers in a union, but not too much due to the work
-/// granularity.
+/// EmitterWorkItem is a simple wrapper around a pointer that represents a work
+/// item to be emitted, i.e. a function or a set of global declarations and
+/// initializers, and it includes a sequence number so that work items can be
+/// emitted in a particular order for deterministic output. It acts like an
+/// interface class, but instead of making the classes of interest inherit from
+/// EmitterWorkItem, it wraps pointers to these classes. Some space is wasted
+/// compared to storing the pointers in a union, but not too much due to the
+/// work granularity.
 class EmitterWorkItem {
   EmitterWorkItem() = delete;
   EmitterWorkItem(const EmitterWorkItem &) = delete;
@@ -165,20 +157,19 @@
 public:
   /// ItemKind can be one of the following:
   ///
-  /// WI_Nop: No actual work.  This is a placeholder to maintain
-  /// sequence numbers in case there is a translation error.
+  /// WI_Nop: No actual work. This is a placeholder to maintain sequence numbers
+  /// in case there is a translation error.
   ///
   /// WI_GlobalInits: A list of global declarations and initializers.
   ///
-  /// WI_Asm: A function that has already had emitIAS() called on it.
-  /// The work is transferred via the Assembler buffer, and the
-  /// originating Cfg has been deleted (to recover lots of memory).
+  /// WI_Asm: A function that has already had emitIAS() called on it. The work
+  /// is transferred via the Assembler buffer, and the originating Cfg has been
+  /// deleted (to recover lots of memory).
   ///
-  /// WI_Cfg: A Cfg that has not yet had emit() or emitIAS() called on
-  /// it.  This is only used as a debugging configuration when we want
-  /// to emit "readable" assembly code, possibly annotated with
-  /// liveness and other information only available in the Cfg and not
-  /// in the Assembler buffer.
+  /// WI_Cfg: A Cfg that has not yet had emit() or emitIAS() called on it. This
+  /// is only used as a debugging configuration when we want to emit "readable"
+  /// assembly code, possibly annotated with liveness and other information only
+  /// available in the Cfg and not in the Assembler buffer.
   enum ItemKind { WI_Nop, WI_GlobalInits, WI_Asm, WI_Cfg };
   /// Constructor for a WI_Nop work item.
   explicit EmitterWorkItem(uint32_t Seq);
diff --git a/src/IceTimerTree.cpp b/src/IceTimerTree.cpp
index dc4622d..133cd41 100644
--- a/src/IceTimerTree.cpp
+++ b/src/IceTimerTree.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file defines the TimerTree class, which tracks flat and
-/// cumulative execution time collection of call chains.
+/// This file defines the TimerTree class, which tracks flat and cumulative
+/// execution time collection of call chains.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -41,8 +41,7 @@
 #undef STR
 }
 
-// Returns the unique timer ID for the given Name, creating a new ID
-// if needed.
+// Returns the unique timer ID for the given Name, creating a new ID if needed.
 TimerIdT TimerStack::getTimerID(const IceString &Name) {
   if (!BuildDefs::dump())
     return 0;
@@ -55,9 +54,9 @@
   return IDsIndex[Name];
 }
 
-// Creates a mapping from TimerIdT (leaf) values in the Src timer
-// stack into TimerIdT values in this timer stack.  Creates new
-// entries in this timer stack as needed.
+// Creates a mapping from TimerIdT (leaf) values in the Src timer stack into
+// TimerIdT values in this timer stack. Creates new entries in this timer stack
+// as needed.
 TimerStack::TranslationType
 TimerStack::translateIDsFrom(const TimerStack &Src) {
   size_t Size = Src.IDs.size();
@@ -68,8 +67,8 @@
   return Mapping;
 }
 
-// Merges two timer stacks, by combining and summing corresponding
-// entries.  This timer stack is updated from Src.
+// Merges two timer stacks, by combining and summing corresponding entries.
+// This timer stack is updated from Src.
 void TimerStack::mergeFrom(const TimerStack &Src) {
   if (!BuildDefs::dump())
     return;
@@ -78,11 +77,11 @@
   for (const TimerTreeNode &SrcNode : Src.Nodes) {
     // The first node is reserved as a sentinel, so avoid it.
     if (SrcIndex > 0) {
-      // Find the full path to the Src node, translated to path
-      // components corresponding to this timer stack.
+      // Find the full path to the Src node, translated to path components
+      // corresponding to this timer stack.
       PathType MyPath = Src.getPath(SrcIndex, Mapping);
-      // Find a node in this timer stack corresponding to the given
-      // path, creating new interior nodes as necessary.
+      // Find a node in this timer stack corresponding to the given path,
+      // creating new interior nodes as necessary.
       TTindex MyIndex = findPath(MyPath);
       Nodes[MyIndex].Time += SrcNode.Time;
       Nodes[MyIndex].UpdateCount += SrcNode.UpdateCount;
@@ -96,10 +95,9 @@
   StateChangeCount += Src.StateChangeCount;
 }
 
-// Constructs a path consisting of the sequence of leaf values leading
-// to a given node, with the Mapping translation applied to the leaf
-// values.  The path ends up being in "reverse" order, i.e. from leaf
-// to root.
+// Constructs a path consisting of the sequence of leaf values leading to a
+// given node, with the Mapping translation applied to the leaf values. The
+// path ends up being in "reverse" order, i.e. from leaf to root.
 TimerStack::PathType TimerStack::getPath(TTindex Index,
                                          const TranslationType &Mapping) const {
   PathType Path;
@@ -111,8 +109,8 @@
   return Path;
 }
 
-// Given a parent node and a leaf ID, returns the index of the
-// parent's child ID, creating a new node for the child as necessary.
+// Given a parent node and a leaf ID, returns the index of the parent's child
+// ID, creating a new node for the child as necessary.
 TimerStack::TTindex TimerStack::getChildIndex(TimerStack::TTindex Parent,
                                               TimerIdT ID) {
   if (Nodes[Parent].Children.size() <= ID)
@@ -127,12 +125,12 @@
   return Nodes[Parent].Children[ID];
 }
 
-// Finds a node in the timer stack corresponding to the given path,
-// creating new interior nodes as necessary.
+// Finds a node in the timer stack corresponding to the given path, creating
+// new interior nodes as necessary.
 TimerStack::TTindex TimerStack::findPath(const PathType &Path) {
   TTindex CurIndex = 0;
-  // The path is in reverse order (leaf to root), so it needs to be
-  // followed in reverse.
+  // The path is in reverse order (leaf to root), so it needs to be followed in
+  // reverse.
   for (TTindex Index : reverse_range(Path)) {
     CurIndex = getChildIndex(CurIndex, Index);
   }
@@ -150,8 +148,8 @@
   assert(StackTop);
 }
 
-// Pops the top marker from the timer stack.  Validates via assert()
-// that the expected marker is popped.
+// Pops the top marker from the timer stack. Validates via assert() that the
+// expected marker is popped.
 void TimerStack::pop(TimerIdT ID) {
   if (!BuildDefs::dump())
     return;
@@ -167,15 +165,15 @@
   StackTop = Nodes[StackTop].Parent;
 }
 
-// At a state change (e.g. push or pop), updates the flat and
-// cumulative timings for everything on the timer stack.
+// At a state change (e.g. push or pop), updates the flat and cumulative
+// timings for everything on the timer stack.
 void TimerStack::update(bool UpdateCounts) {
   if (!BuildDefs::dump())
     return;
   ++StateChangeCount;
-  // Whenever the stack is about to change, we grab the time delta
-  // since the last change and add it to all active cumulative
-  // elements and to the flat element for the top of the stack.
+  // Whenever the stack is about to change, we grab the time delta since the
+  // last change and add it to all active cumulative elements and to the flat
+  // element for the top of the stack.
   double Current = timestamp();
   double Delta = Current - LastTimestamp;
   if (StackTop) {
@@ -198,10 +196,10 @@
     assert(Next < Prefix);
     Prefix = Next;
   }
-  // Capture the next timestamp *after* the updates are finished.
-  // This minimizes how much the timer can perturb the reported
-  // timing.  The numbers may not sum to 100%, and the missing amount
-  // is indicative of the overhead of timing.
+  // Capture the next timestamp *after* the updates are finished. This
+  // minimizes how much the timer can perturb the reported timing. The numbers
+  // may not sum to 100%, and the missing amount is indicative of the overhead
+  // of timing.
   LastTimestamp = timestamp();
 }
 
@@ -234,8 +232,8 @@
   }
 }
 
-// Write a printf() format string into Buf[], in the format "[%5lu] ",
-// where "5" is actually the number of digits in MaxVal.  E.g.,
+// Write a printf() format string into Buf[], in the format "[%5lu] ", where
+// "5" is actually the number of digits in MaxVal. E.g.,
 //   MaxVal=0     ==> "[%1lu] "
 //   MaxVal=5     ==> "[%1lu] "
 //   MaxVal=9876  ==> "[%4lu] "
diff --git a/src/IceTimerTree.def b/src/IceTimerTree.def
index 6db9fbc..6e12219 100644
--- a/src/IceTimerTree.def
+++ b/src/IceTimerTree.def
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file lists predefined timing tags.  New tags can be added to
-// avoid a runtime string lookup.
+// This file lists predefined timing tags. New tags can be added to avoid a
+// runtime string lookup.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/src/IceTimerTree.h b/src/IceTimerTree.h
index 98bbdda..796ce8c 100644
--- a/src/IceTimerTree.h
+++ b/src/IceTimerTree.h
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares the TimerTree class, which allows flat and
-/// cumulative execution time collection of call chains.
+/// This file declares the TimerTree class, which allows flat and cumulative
+/// execution time collection of call chains.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -26,23 +26,22 @@
   TimerStack() = delete;
   TimerStack &operator=(const TimerStack &) = delete;
 
-  /// Timer tree index type.  A variable of this type is used to access
-  /// an interior, not-necessarily-leaf node of the tree.
+  /// Timer tree index type. A variable of this type is used to access an
+  /// interior, not-necessarily-leaf node of the tree.
   using TTindex = std::vector<class TimerTreeNode>::size_type;
-  /// Representation of a path of leaf values leading to a particular
-  /// node.  The representation happens to be in "reverse" order,
-  /// i.e. from leaf/interior to root, for implementation efficiency.
+  /// Representation of a path of leaf values leading to a particular node. The
+  /// representation happens to be in "reverse" order, i.e. from leaf/interior
+  /// to root, for implementation efficiency.
   using PathType = llvm::SmallVector<TTindex, 8>;
-  /// Representation of a mapping of leaf node indexes from one timer
-  /// stack to another.
+  /// Representation of a mapping of leaf node indexes from one timer stack to
+  /// another.
   using TranslationType = std::vector<TimerIdT>;
 
-  /// TimerTreeNode represents an interior or leaf node in the call tree.
-  /// It contains a list of children, a pointer to its parent, and the
-  /// timer ID for the node.  It also holds the cumulative time spent at
-  /// this node and below.  The children are always at a higher index in
-  /// the TimerTreeNode::Nodes array, and the parent is always at a lower
-  /// index.
+  /// TimerTreeNode represents an interior or leaf node in the call tree. It
+  /// contains a list of children, a pointer to its parent, and the timer ID for
+  /// the node. It also holds the cumulative time spent at this node and below.
+  /// The children are always at a higher index in the TimerTreeNode::Nodes
+  /// array, and the parent is always at a lower index.
   class TimerTreeNode {
     TimerTreeNode &operator=(const TimerTreeNode &) = delete;
 
diff --git a/src/IceTranslator.cpp b/src/IceTranslator.cpp
index e3a32dc..68f2b1f 100644
--- a/src/IceTranslator.cpp
+++ b/src/IceTranslator.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file defines the general driver class for translating ICE to
-/// machine code.
+/// This file defines the general driver class for translating ICE to machine
+/// code.
 ///
 //===----------------------------------------------------------------------===//
 
diff --git a/src/IceTranslator.h b/src/IceTranslator.h
index 449b216..415965a 100644
--- a/src/IceTranslator.h
+++ b/src/IceTranslator.h
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares the general driver class for translating ICE to
-/// machine code.
+/// This file declares the general driver class for translating ICE to machine
+/// code.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -30,7 +30,7 @@
 class VariableDeclaration;
 class GlobalContext;
 
-/// Base class for translating ICE to machine code.  Derived classes convert
+/// Base class for translating ICE to machine code. Derived classes convert
 /// other intermediate representations down to ICE, and then call the
 /// appropriate (inherited) methods to convert ICE into machine instructions.
 class Translator {
@@ -48,21 +48,21 @@
 
   const ClFlags &getFlags() const { return Ctx->getFlags(); }
 
-  /// Translates the constructed ICE function Fcn to machine code.
-  /// Takes ownership of Func.
+  /// Translates the constructed ICE function Fcn to machine code. Takes
+  /// ownership of Func.
   void translateFcn(std::unique_ptr<Cfg> Func);
 
-  /// Lowers the given list of global addresses to target. Generates
-  /// list of corresponding variable declarations.
+  /// Lowers the given list of global addresses to target. Generates list of
+  /// corresponding variable declarations.
   void
   lowerGlobals(std::unique_ptr<VariableDeclarationList> VariableDeclarations);
 
   /// Creates a name using the given prefix and corresponding index.
   std::string createUnnamedName(const IceString &Prefix, SizeT Index);
 
-  /// Reports if there is a (potential) conflict between Name, and using
-  /// Prefix to name unnamed names. Errors are put on Ostream.
-  /// Returns true if there isn't a potential conflict.
+  /// Reports if there is a (potential) conflict between Name, and using Prefix
+  /// to name unnamed names. Errors are put on Ostream. Returns true if there
+  /// isn't a potential conflict.
   bool checkIfUnnamedNameSafe(const IceString &Name, const char *Kind,
                               const IceString &Prefix);
 
diff --git a/src/IceTypeConverter.h b/src/IceTypeConverter.h
index cb3536c..c61423c 100644
--- a/src/IceTypeConverter.h
+++ b/src/IceTypeConverter.h
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file defines how to convert LLVM types to ICE types, and ICE types
-/// to LLVM types.
+/// This file defines how to convert LLVM types to ICE types, and ICE types to
+/// LLVM types.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -40,8 +40,8 @@
   /// Context is the context to use to build llvm types.
   explicit TypeConverter(llvm::LLVMContext &Context);
 
-  /// Converts LLVM type LLVMTy to an ICE type. Returns
-  /// Ice::IceType_NUM if unable to convert.
+  /// Converts LLVM type LLVMTy to an ICE type. Returns Ice::IceType_NUM if
+  /// unable to convert.
   Type convertToIceType(llvm::Type *LLVMTy) const {
     auto Pos = LLVM2IceMap.find(LLVMTy);
     if (Pos == LLVM2IceMap.end())
diff --git a/src/IceTypes.cpp b/src/IceTypes.cpp
index dd06b1e..6cc79b7 100644
--- a/src/IceTypes.cpp
+++ b/src/IceTypes.cpp
@@ -58,8 +58,7 @@
 ICETYPE_PROPS_TABLE
 #undef X
 
-// Show vector definitions match in ICETYPE_TABLE and
-// ICETYPE_PROPS_TABLE.
+// Show vector definitions match in ICETYPE_TABLE and ICETYPE_PROPS_TABLE.
 
 // Define constants for each element size in ICETYPE_TABLE.
 enum {
diff --git a/src/IceTypes.def b/src/IceTypes.def
index 94877a2..b86dba8 100644
--- a/src/IceTypes.def
+++ b/src/IceTypes.def
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines properties of ICE primitive types in the form of
-// x-macros.
+// This file defines properties of ICE primitive types in the form of x-macros.
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,10 +15,10 @@
 #define SUBZERO_SRC_ICETYPES_DEF
 
 // Attributes of each target architecture.
-// NOTE on is_elf64 -- At some point NaCl would like to use ELF32 for all
-// ILP32 sandboxes, but for now the 64-bit architectures use ELF64:
-// https://code.google.com/p/nativeclient/issues/detail?id=349
-// TODO: Whoever adds AArch64 will need to set ABI e_flags.
+// NOTE on is_elf64 -- At some point NaCl would like to use ELF32 for all ILP32
+// sandboxes, but for now the 64-bit architectures use ELF64:
+// https://code.google.com/p/nativeclient/issues/detail?id=349 TODO: Whoever
+// adds AArch64 will need to set ABI e_flags.
 #define TARGETARCH_TABLE                                                 \
   /* enum value, printable string, is_elf64,   e_machine, e_flags */     \
   X(Target_X8632, "x86-32", false, EM_386,     0)                        \
diff --git a/src/IceTypes.h b/src/IceTypes.h
index 3c87f68..f176e9b 100644
--- a/src/IceTypes.h
+++ b/src/IceTypes.h
@@ -8,9 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file declares a few properties of the primitive types allowed
-/// in Subzero.  Every Subzero source file is expected to include
-/// IceTypes.h.
+/// This file declares a few properties of the primitive types allowed in
+/// Subzero. Every Subzero source file is expected to include IceTypes.h.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -42,8 +41,8 @@
   return Stream << targetArchString(Arch);
 }
 
-/// The list of all target instruction sets. Individual targets will
-/// map this to include only what is valid for the target.
+/// The list of all target instruction sets. Individual targets will map this to
+/// include only what is valid for the target.
 enum TargetInstructionSet {
   // Represents baseline that can be assumed for a target (usually "Begin").
   BaseInstructionSet,
@@ -97,8 +96,8 @@
   return result;
 }
 
-/// Check if Ty is byte sized and specifically i8. Assert that it's not
-/// byte sized due to being an i1.
+/// Check if Ty is byte sized and specifically i8. Assert that it's not byte
+/// sized due to being an i1.
 inline bool isByteSizedArithType(Type Ty) {
   assert(Ty != IceType_i1);
   return Ty == IceType_i8;
@@ -131,8 +130,8 @@
 public:
   using ArgListType = std::vector<Type>;
 
-  /// Creates a function signature type with the given return type.
-  /// Parameter types should be added using calls to appendArgType.
+  /// Creates a function signature type with the given return type. Parameter
+  /// types should be added using calls to appendArgType.
   FuncSigType() = default;
   FuncSigType(const FuncSigType &Ty) = default;
 
diff --git a/src/IceUtils.h b/src/IceUtils.h
index f07a566..9387671 100644
--- a/src/IceUtils.h
+++ b/src/IceUtils.h
@@ -19,11 +19,10 @@
 
 namespace Ice {
 
-/// Similar to bit_cast, but allows copying from types of unrelated
-/// sizes. This method was introduced to enable the strict aliasing
-/// optimizations of GCC 4.4. Basically, GCC mindlessly relies on
-/// obscure details in the C++ standard that make reinterpret_cast
-/// virtually useless.
+/// Similar to bit_cast, but allows copying from types of unrelated sizes. This
+/// method was introduced to enable the strict aliasing optimizations of GCC
+/// 4.4. Basically, GCC mindlessly relies on obscure details in the C++ standard
+/// that make reinterpret_cast virtually useless.
 template <class D, class S> inline D bit_copy(const S &source) {
   D destination;
   // This use of memcpy is safe: source and destination cannot overlap.
@@ -63,8 +62,8 @@
     return IsUint(N, Value);
   }
 
-  /// Return true if the addition X + Y will cause integer overflow for
-  /// integers of type T.
+  /// Return true if the addition X + Y will cause integer overflow for integers
+  /// of type T.
   template <typename T> static inline bool WouldOverflowAdd(T X, T Y) {
     return ((X > 0 && Y > 0 && (X > std::numeric_limits<T>::max() - Y)) ||
             (X < 0 && Y < 0 && (X < std::numeric_limits<T>::min() - Y)));
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index 023a433..d8d1860 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp
@@ -41,11 +41,10 @@
 namespace {
 using namespace llvm;
 
-// Models elements in the list of types defined in the types block.
-// These elements can be undefined, a (simple) type, or a function type
-// signature. Note that an extended type is undefined on construction.
-// Use methods setAsSimpleType and setAsFuncSigType to define
-// the extended type.
+// Models elements in the list of types defined in the types block. These
+// elements can be undefined, a (simple) type, or a function type signature.
+// Note that an extended type is undefined on construction. Use methods
+// setAsSimpleType and setAsFuncSigType to define the extended type.
 class ExtendedType {
   ExtendedType &operator=(const ExtendedType &Ty) = delete;
 
@@ -61,8 +60,7 @@
   ExtendedType::TypeKind getKind() const { return Kind; }
   void dump(Ice::Ostream &Stream) const;
 
-  /// Changes the extended type to a simple type with the given
-  /// value.
+  /// Changes the extended type to a simple type with the given / value.
   void setAsSimpleType(Ice::Type Ty) {
     assert(Kind == Undefined);
     Kind = Simple;
@@ -76,8 +74,8 @@
   }
 
 protected:
-  // Note: For simple types, the return type of the signature will
-  // be used to hold the simple type.
+  // Note: For simple types, the return type of the signature will be used to
+  // hold the simple type.
   Ice::FuncSigType Signature;
 
 private:
@@ -180,16 +178,15 @@
     BlockParser = NewBlockParser;
   }
 
-  /// Generates error with given Message, occurring at BitPosition
-  /// within the bitcode file. Always returns true.
+  /// Generates error with given Message, occurring at BitPosition within the
+  /// bitcode file. Always returns true.
   bool ErrorAt(naclbitc::ErrorLevel Level, uint64_t BitPosition,
                const std::string &Message) final;
 
   /// Generates error message with respect to the current block parser.
   bool blockError(const std::string &Message);
 
-  /// Returns the number of errors found while parsing the bitcode
-  /// file.
+  /// Returns the number of errors found while parsing the bitcode file.
   unsigned getNumErrors() const { return NumErrors; }
 
   /// Changes the size of the type list to the given size.
@@ -202,11 +199,11 @@
     return Translator.getFlags().getDisableIRGeneration();
   }
 
-  /// Returns the undefined type associated with type ID.
-  /// Note: Returns extended type ready to be defined.
+  /// Returns the undefined type associated with type ID. Note: Returns extended
+  /// type ready to be defined.
   ExtendedType *getTypeByIDForDefining(NaClBcIndexSize_t ID) {
-    // Get corresponding element, verifying the value is still undefined
-    // (and hence allowed to be defined).
+    // Get corresponding element, verifying the value is still undefined (and
+    // hence allowed to be defined).
     ExtendedType *Ty = getTypeByIDAsKind(ID, ExtendedType::Undefined);
     if (Ty)
       return Ty;
@@ -248,9 +245,9 @@
     FunctionDeclarations.push_back(Fcn);
   }
 
-  /// Returns the value id that should be associated with the the
-  /// current function block. Increments internal counters during call
-  /// so that it will be in correct position for next function block.
+  /// Returns the value id that should be associated with the the current
+  /// function block. Increments internal counters during call so that it will
+  /// be in correct position for next function block.
   NaClBcIndexSize_t getNextFunctionBlockValueID() {
     size_t NumDeclaredFunctions = FunctionDeclarations.size();
     while (NextDefiningFunctionID < NumDeclaredFunctions &&
@@ -274,9 +271,9 @@
     return ValueIDConstants[ID];
   }
 
-  /// Install names for all global values without names. Called after
-  /// the global value symbol table is processed, but before any
-  /// function blocks are processed.
+  /// Install names for all global values without names. Called after the global
+  /// value symbol table is processed, but before any function blocks are
+  /// processed.
   void installGlobalNames() {
     assert(VariableDeclarations);
     installGlobalVarNames();
@@ -294,8 +291,8 @@
   /// Returns the number of function declarations in the bitcode file.
   size_t getNumFunctionIDs() const { return FunctionDeclarations.size(); }
 
-  /// Returns the number of global declarations (i.e. IDs) defined in
-  /// the bitcode file.
+  /// Returns the number of global declarations (i.e. IDs) defined in the
+  /// bitcode file.
   size_t getNumGlobalIDs() const {
     if (VariableDeclarations) {
       return FunctionDeclarations.size() + VariableDeclarations->size();
@@ -319,8 +316,8 @@
     return reportGetGlobalVariableByIDError(Index);
   }
 
-  /// Returns the global declaration (variable or function) with the
-  /// given Index.
+  /// Returns the global declaration (variable or function) with the given
+  /// Index.
   Ice::GlobalDeclaration *getGlobalDeclarationByID(NaClBcIndexSize_t Index) {
     size_t NumFunctionIds = FunctionDeclarations.size();
     if (Index < NumFunctionIds)
@@ -329,13 +326,12 @@
       return getGlobalVariableByID(Index - NumFunctionIds);
   }
 
-  /// Returns the list of parsed global variable
-  /// declarations. Releases ownership of the current list of global
-  /// variables. Note: only returns non-null pointer on first
-  /// call. All successive calls return a null pointer.
+  /// Returns the list of parsed global variable declarations. Releases
+  /// ownership of the current list of global variables. Note: only returns
+  /// non-null pointer on first call. All successive calls return a null
+  /// pointer.
   std::unique_ptr<Ice::VariableDeclarationList> getGlobalVariables() {
-    // Before returning, check that ValidIDConstants has already been
-    // built.
+    // Before returning, check that ValidIDConstants has already been built.
     assert(!VariableDeclarations ||
            VariableDeclarations->size() <= ValueIDConstants.size());
     return std::move(VariableDeclarations);
@@ -364,16 +360,14 @@
   Ice::ConstantList ValueIDConstants;
   // Error recovery value to use when getFuncSigTypeByID fails.
   Ice::FuncSigType UndefinedFuncSigType;
-  // The block parser currently being applied. Used for error
-  // reporting.
+  // The block parser currently being applied. Used for error reporting.
   BlockParserBaseClass *BlockParser = nullptr;
 
   bool ParseBlock(unsigned BlockID) override;
 
-  // Gets extended type associated with the given index, assuming the
-  // extended type is of the WantedKind. Generates error message if
-  // corresponding extended type of WantedKind can't be found, and
-  // returns nullptr.
+  // Gets extended type associated with the given index, assuming the extended
+  // type is of the WantedKind. Generates error message if corresponding
+  // extended type of WantedKind can't be found, and returns nullptr.
   ExtendedType *getTypeByIDAsKind(NaClBcIndexSize_t ID,
                                   ExtendedType::TypeKind WantedKind) {
     ExtendedType *Ty = nullptr;
@@ -387,12 +381,11 @@
     return nullptr;
   }
 
-  // Gives Decl a name if it doesn't already have one. Prefix and
-  // NameIndex are used to generate the name. NameIndex is
-  // automatically incremented if a new name is created.  DeclType is
-  // literal text describing the type of name being created. Also
-  // generates warning if created names may conflict with named
-  // declarations.
+  // Gives Decl a name if it doesn't already have one. Prefix and NameIndex are
+  // used to generate the name. NameIndex is automatically incremented if a new
+  // name is created. DeclType is literal text describing the type of name
+  // being created. Also generates warning if created names may conflict with
+  // named declarations.
   void installDeclarationName(Ice::GlobalDeclaration *Decl,
                               const Ice::IceString &Prefix,
                               const char *DeclType,
@@ -431,7 +424,7 @@
   }
 
   // Builds a constant symbol named Name, suppressing name mangling if
-  // SuppressMangling.  IsExternal is true iff the symbol is external.
+  // SuppressMangling. IsExternal is true iff the symbol is external.
   Ice::Constant *getConstantSym(const Ice::IceString &Name,
                                 bool SuppressMangling, bool IsExternal) const {
     if (IsExternal) {
@@ -471,17 +464,17 @@
   void reportBadTypeIDAs(NaClBcIndexSize_t ID, const ExtendedType *Ty,
                          ExtendedType::TypeKind WantedType);
 
-  // Reports that there is no function declaration for ID. Returns an
-  // error recovery value to use.
+  // Reports that there is no function declaration for ID. Returns an error
+  // recovery value to use.
   Ice::FunctionDeclaration *reportGetFunctionByIDError(NaClBcIndexSize_t ID);
 
-  // Reports that there is not global variable declaration for
-  // ID. Returns an error recovery value to use.
+  // Reports that there is not global variable declaration for ID. Returns an
+  // error recovery value to use.
   Ice::VariableDeclaration *
   reportGetGlobalVariableByIDError(NaClBcIndexSize_t Index);
 
-  // Reports that there is no corresponding ICE type for LLVMTy, and
-  // returns Ice::IceType_void.
+  // Reports that there is no corresponding ICE type for LLVMTy, and returns
+  // Ice::IceType_void.
   Ice::Type convertToIceTypeError(Type *LLVMTy);
 };
 
@@ -549,10 +542,9 @@
   return Ice::IceType_void;
 }
 
-// Base class for parsing blocks within the bitcode file.  Note:
-// Because this is the base class of block parsers, we generate error
-// messages if ParseBlock or ParseRecord is not overridden in derived
-// classes.
+// Base class for parsing blocks within the bitcode file. Note: Because this is
+// the base class of block parsers, we generate error messages if ParseBlock or
+// ParseRecord is not overridden in derived classes.
 class BlockParserBaseClass : public NaClBitcodeParser {
   BlockParserBaseClass() = delete;
   BlockParserBaseClass(const BlockParserBaseClass &) = delete;
@@ -595,16 +587,15 @@
     return getTranslator().getFlags().getDisableIRGeneration();
   }
 
-  // Default implementation. Reports that block is unknown and skips
-  // its contents.
+  // Default implementation. Reports that block is unknown and skips its
+  // contents.
   bool ParseBlock(unsigned BlockID) override;
 
-  // Default implementation. Reports that the record is not
-  // understood.
+  // Default implementation. Reports that the record is not understood.
   void ProcessRecord() override;
 
-  // Checks if the size of the record is Size.  Return true if valid.
-  // Otherwise generates an error and returns false.
+  // Checks if the size of the record is Size. Return true if valid. Otherwise
+  // generates an error and returns false.
   bool isValidRecordSize(size_t Size, const char *RecordName) {
     const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
     if (Values.size() == Size)
@@ -613,9 +604,8 @@
     return false;
   }
 
-  // Checks if the size of the record is at least as large as the
-  // LowerLimit. Returns true if valid.  Otherwise generates an error
-  // and returns false.
+  // Checks if the size of the record is at least as large as the LowerLimit.
+  // Returns true if valid. Otherwise generates an error and returns false.
   bool isValidRecordSizeAtLeast(size_t LowerLimit, const char *RecordName) {
     const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
     if (Values.size() >= LowerLimit)
@@ -625,8 +615,8 @@
   }
 
   // Checks if the size of the record is no larger than the
-  // UpperLimit.  Returns true if valid.  Otherwise generates an error
-  // and returns false.
+  // UpperLimit.  Returns true if valid. Otherwise generates an error and
+  // returns false.
   bool isValidRecordSizeAtMost(size_t UpperLimit, const char *RecordName) {
     const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
     if (Values.size() <= UpperLimit)
@@ -635,9 +625,9 @@
     return false;
   }
 
-  // Checks if the size of the record is at least as large as the
-  // LowerLimit, and no larger than the UpperLimit.  Returns true if
-  // valid.  Otherwise generates an error and returns false.
+  // Checks if the size of the record is at least as large as the LowerLimit,
+  // and no larger than the UpperLimit. Returns true if valid. Otherwise
+  // generates an error and returns false.
   bool isValidRecordSizeInRange(size_t LowerLimit, size_t UpperLimit,
                                 const char *RecordName) {
     return isValidRecordSizeAtLeast(LowerLimit, RecordName) ||
@@ -645,11 +635,10 @@
   }
 
 private:
-  /// Generates a record size error. ExpectedSize is the number
-  /// of elements expected. RecordName is the name of the kind of
-  /// record that has incorrect size. ContextMessage (if not nullptr)
-  /// is appended to "record expects" to describe how ExpectedSize
-  /// should be interpreted.
+  /// Generates a record size error. ExpectedSize is the number of elements
+  /// expected. RecordName is the name of the kind of record that has incorrect
+  /// size. ContextMessage (if not nullptr) is appended to "record expects" to
+  /// describe how ExpectedSize should be interpreted.
   void reportRecordSizeError(size_t ExpectedSize, const char *RecordName,
                              const char *ContextMessage);
 };
@@ -666,9 +655,9 @@
                                    const std::string &Message) {
   std::string Buffer;
   raw_string_ostream StrBuf(Buffer);
-  // Note: If dump routines have been turned off, the error messages
-  // will not be readable. Hence, replace with simple error. We also
-  // use the simple form for unit tests.
+  // Note: If dump routines have been turned off, the error messages will not
+  // be readable. Hence, replace with simple error. We also use the simple form
+  // for unit tests.
   if (getFlags().getGenerateUnitTestMessages()) {
     StrBuf << "Invalid " << getBlockName() << " record: <" << Record.GetCode();
     for (const uint64_t Val : Record.GetValues()) {
@@ -700,8 +689,8 @@
 }
 
 bool BlockParserBaseClass::ParseBlock(unsigned BlockID) {
-  // If called, derived class doesn't know how to handle block.
-  // Report error and skip.
+  // If called, derived class doesn't know how to handle block. Report error
+  // and skip.
   std::string Buffer;
   raw_string_ostream StrBuf(Buffer);
   StrBuf << "Don't know how to parse block id: " << BlockID;
@@ -742,8 +731,8 @@
 
 private:
   Ice::TimerMarker Timer;
-  // The type ID that will be associated with the next type defining
-  // record in the types block.
+  // The type ID that will be associated with the next type defining record in
+  // the types block.
   NaClBcIndexSize_t NextTypeId = 0;
 
   // The expected number of types, based on record TYPE_CODE_NUMENTRY.
@@ -773,13 +762,11 @@
       Error(StrBuf.str());
       ExpectedNumTypes = NaClBcIndexSize_t_Max;
     }
-    // The code double checks that Expected size and the actual size
-    // at the end of the block. To reduce allocations we preallocate
-    // the space.
+    // The code double checks that Expected size and the actual size at the end
+    // of the block. To reduce allocations we preallocate the space.
     //
-    // However, if the number is large, we suspect that the number
-    // is (possibly) incorrect. In that case, we preallocate a
-    // smaller space.
+    // However, if the number is large, we suspect that the number is
+    // (possibly) incorrect. In that case, we preallocate a smaller space.
     constexpr uint64_t DefaultLargeResizeValue = 1000000;
     Context->resizeTypeIDValues(std::min(Size, DefaultLargeResizeValue));
     ExpectedNumTypes = Size;
@@ -902,9 +889,9 @@
     FuncSigExtendedType *FuncTy = cast<FuncSigExtendedType>(Ty);
     FuncTy->setReturnType(Context->getSimpleTypeByID(Values[1]));
     for (size_t i = 2, e = Values.size(); i != e; ++i) {
-      // Check that type void not used as argument type.
-      // Note: PNaCl restrictions can't be checked until we
-      // know the name, because we have to check for intrinsic signatures.
+      // Check that type void not used as argument type. Note: PNaCl
+      // restrictions can't be checked until we know the name, because we have
+      // to check for intrinsic signatures.
       Ice::Type ArgTy = Context->getSimpleTypeByID(Values[i]);
       if (ArgTy == Ice::IceType_void) {
         std::string Buffer;
@@ -956,8 +943,8 @@
   // Holds the number of defined function IDs.
   NaClBcIndexSize_t NumFunctionIDs;
 
-  // Holds the specified number of global variables by the count record in
-  // the global variables block.
+  // Holds the specified number of global variables by the count record in the
+  // global variables block.
   NaClBcIndexSize_t SpecifiedNumberVars = 0;
 
   // Keeps track of how many initializers are expected for the global variable
@@ -967,9 +954,8 @@
   // The index of the next global variable declaration.
   NaClBcIndexSize_t NextGlobalID = 0;
 
-  // Dummy global variable declaration to guarantee CurGlobalVar is
-  // always defined (allowing code to not need to check if
-  // CurGlobalVar is nullptr).
+  // Dummy global variable declaration to guarantee CurGlobalVar is always
+  // defined (allowing code to not need to check if CurGlobalVar is nullptr).
   Ice::VariableDeclaration *DummyGlobalVar;
 
   // Holds the current global variable declaration being built.
@@ -1230,14 +1216,13 @@
       getTranslator().getContext()->pushTimer(TimerID, StackID);
     }
 
-    // Note: The Cfg is created, even when IR generation is disabled. This
-    // is done to install a CfgLocalAllocator for various internal containers.
+    // Note: The Cfg is created, even when IR generation is disabled. This is
+    // done to install a CfgLocalAllocator for various internal containers.
     Func = Ice::Cfg::create(getTranslator().getContext(),
                             getTranslator().getNextSequenceNumber());
     Ice::Cfg::setCurrentCfg(Func.get());
 
-    // TODO(kschimpf) Clean up API to add a function signature to
-    // a CFG.
+    // TODO(kschimpf) Clean up API to add a function signature to a CFG.
     const Ice::FuncSigType &Signature = FuncDecl->getSignature();
     if (isIRGenerationDisabled()) {
       CurrentNode = nullptr;
@@ -1257,17 +1242,17 @@
     }
     bool ParserResult = ParseThisBlock();
 
-    // Temporarily end per-function timing, which will be resumed by
-    // the translator function.  This is because translation may be
-    // done asynchronously in a separate thread.
+    // Temporarily end per-function timing, which will be resumed by the
+    // translator function. This is because translation may be done
+    // asynchronously in a separate thread.
     if (TimeThisFunction)
       getTranslator().getContext()->popTimer(TimerID, StackID);
 
     Ice::Cfg::setCurrentCfg(nullptr);
-    // Note: Once any errors have been found, we turn off all
-    // translation of all remaining functions. This allows successive
-    // parsing errors to be reported, without adding extra checks to
-    // the translator for such parsing errors.
+    // Note: Once any errors have been found, we turn off all translation of
+    // all remaining functions. This allows successive parsing errors to be
+    // reported, without adding extra checks to the translator for such parsing
+    // errors.
     if (Context->getNumErrors() == 0 && Func) {
       getTranslator().translateFcn(std::move(Func));
       // The translator now has ownership of Func.
@@ -1332,21 +1317,20 @@
   Ice::FunctionDeclaration *FuncDecl;
   // Holds the dividing point between local and global absolute value indices.
   size_t CachedNumGlobalValueIDs;
-  // Holds operands local to the function block, based on indices
-  // defined in the bitcode file.
+  // Holds operands local to the function block, based on indices defined in
+  // the bitcode file.
   Ice::OperandList LocalOperands;
-  // Holds the index within LocalOperands corresponding to the next
-  // instruction that generates a value.
+  // Holds the index within LocalOperands corresponding to the next instruction
+  // that generates a value.
   NaClBcIndexSize_t NextLocalInstIndex;
-  // True if the last processed instruction was a terminating
-  // instruction.
+  // True if the last processed instruction was a terminating instruction.
   bool InstIsTerminating = false;
   // Upper limit of alignment power allowed by LLVM
   static const uint32_t AlignPowerLimit = 29;
 
-  // Extracts the corresponding Alignment to use, given the AlignPower
-  // (i.e. 2**(AlignPower-1), or 0 if AlignPower == 0). InstName is the
-  // name of the instruction the alignment appears in.
+  // Extracts the corresponding Alignment to use, given the AlignPower (i.e.
+  // 2**(AlignPower-1), or 0 if AlignPower == 0). InstName is the name of the
+  // instruction the alignment appears in.
   void extractAlignment(const char *InstName, uint32_t AlignPower,
                         uint32_t &Alignment) {
     if (AlignPower <= AlignPowerLimit + 1) {
@@ -1396,10 +1380,9 @@
     return Func->getNodes()[Index];
   }
 
-  // Returns the Index-th basic block in the list of basic blocks.
-  // Assumes Index corresponds to a branch instruction. Hence, if
-  // the branch references the entry block, it also generates a
-  // corresponding error.
+  // Returns the Index-th basic block in the list of basic blocks. Assumes
+  // Index corresponds to a branch instruction. Hence, if the branch references
+  // the entry block, it also generates a corresponding error.
   Ice::CfgNode *getBranchBasicBlock(NaClBcIndexSize_t Index) {
     assert(!isIRGenerationDisabled());
     if (Index == 0) {
@@ -1448,8 +1431,7 @@
     return Var;
   }
 
-  // Converts a relative index (wrt to BaseIndex) to an absolute value
-  // index.
+  // Converts a relative index (wrt to BaseIndex) to an absolute value index.
   NaClBcIndexSize_t convertRelativeToAbsIndex(NaClRelBcIndexSize_t Id,
                                               NaClRelBcIndexSize_t BaseIndex) {
     if (BaseIndex < Id) {
@@ -1508,8 +1490,8 @@
     LocalOperands[LocalIndex] = Op;
   }
 
-  // Returns the relative operand (wrt to BaseIndex) referenced by
-  // the given value Index.
+  // Returns the relative operand (wrt to BaseIndex) referenced by the given
+  // value Index.
   Ice::Operand *getRelativeOperand(NaClBcIndexSize_t Index,
                                    NaClBcIndexSize_t BaseIndex) {
     return getOperand(convertRelativeToAbsIndex(Index, BaseIndex));
@@ -1518,13 +1500,12 @@
   // Returns the absolute index of the next value generating instruction.
   NaClBcIndexSize_t getNextInstIndex() const { return NextLocalInstIndex; }
 
-  // Generates type error message for binary operator Op
-  // operating on Type OpTy.
+  // Generates type error message for binary operator Op operating on Type
+  // OpTy.
   void reportInvalidBinaryOp(Ice::InstArithmetic::OpKind Op, Ice::Type OpTy);
 
-  // Validates if integer logical Op, for type OpTy, is valid.
-  // Returns true if valid. Otherwise generates error message and
-  // returns false.
+  // Validates if integer logical Op, for type OpTy, is valid. Returns true if
+  // valid. Otherwise generates error message and returns false.
   bool isValidIntegerLogicalOp(Ice::InstArithmetic::OpKind Op, Ice::Type OpTy) {
     if (Ice::isIntegerType(OpTy))
       return true;
@@ -1532,9 +1513,9 @@
     return false;
   }
 
-  // Validates if integer (or vector of integers) arithmetic Op, for type
-  // OpTy, is valid.  Returns true if valid. Otherwise generates
-  // error message and returns false.
+  // Validates if integer (or vector of integers) arithmetic Op, for type OpTy,
+  // is valid. Returns true if valid. Otherwise generates error message and
+  // returns false.
   bool isValidIntegerArithOp(Ice::InstArithmetic::OpKind Op, Ice::Type OpTy) {
     if (Ice::isIntegerArithmeticType(OpTy))
       return true;
@@ -1542,9 +1523,8 @@
     return false;
   }
 
-  // Checks if floating arithmetic Op, for type OpTy, is valid.
-  // Returns true if valid. Otherwise generates an error message and
-  // returns false;
+  // Checks if floating arithmetic Op, for type OpTy, is valid. Returns true if
+  // valid. Otherwise generates an error message and returns false;
   bool isValidFloatingArithOp(Ice::InstArithmetic::OpKind Op, Ice::Type OpTy) {
     if (Ice::isFloatingType(OpTy))
       return true;
@@ -1552,9 +1532,9 @@
     return false;
   }
 
-  // Checks if the type of operand Op is the valid pointer type, for
-  // the given InstructionName. Returns true if valid. Otherwise
-  // generates an error message and returns false.
+  // Checks if the type of operand Op is the valid pointer type, for the given
+  // InstructionName. Returns true if valid. Otherwise generates an error
+  // message and returns false.
   bool isValidPointerType(Ice::Operand *Op, const char *InstructionName) {
     Ice::Type PtrType = Ice::getPointerType();
     if (Op->getType() == PtrType)
@@ -1567,9 +1547,8 @@
     return false;
   }
 
-  // Checks if loading/storing a value of type Ty is allowed.
-  // Returns true if Valid. Otherwise generates an error message and
-  // returns false.
+  // Checks if loading/storing a value of type Ty is allowed. Returns true if
+  // Valid. Otherwise generates an error message and returns false.
   bool isValidLoadStoreType(Ice::Type Ty, const char *InstructionName) {
     if (isLoadStoreType(Ty))
       return true;
@@ -1580,9 +1559,8 @@
     return false;
   }
 
-  // Checks if loading/storing a value of type Ty is allowed for
-  // the given Alignment. Otherwise generates an error message and
-  // returns false.
+  // Checks if loading/storing a value of type Ty is allowed for the given
+  // Alignment. Otherwise generates an error message and returns false.
   bool isValidLoadStoreAlignment(size_t Alignment, Ice::Type Ty,
                                  const char *InstructionName) {
     if (!isValidLoadStoreType(Ty, InstructionName))
@@ -1598,8 +1576,8 @@
   }
 
   // Defines if the given alignment is valid for the given type. Simplified
-  // version of PNaClABIProps::isAllowedAlignment, based on API's offered
-  // for Ice::Type.
+  // version of PNaClABIProps::isAllowedAlignment, based on API's offered for
+  // Ice::Type.
   bool isAllowedAlignment(size_t Alignment, Ice::Type Ty) const {
     return Alignment == typeAlignInBytes(Ty) ||
            (Alignment == 1 && !isVectorType(Ty));
@@ -1655,9 +1633,9 @@
     return VectorIndexValid;
   }
 
-  // Takes the PNaCl bitcode binary operator Opcode, and the opcode
-  // type Ty, and sets Op to the corresponding ICE binary
-  // opcode. Returns true if able to convert, false otherwise.
+  // Takes the PNaCl bitcode binary operator Opcode, and the opcode type Ty,
+  // and sets Op to the corresponding ICE binary opcode. Returns true if able
+  // to convert, false otherwise.
   bool convertBinopOpcode(unsigned Opcode, Ice::Type Ty,
                           Ice::InstArithmetic::OpKind &Op) {
     switch (Opcode) {
@@ -1736,9 +1714,9 @@
     }
   }
 
-  /// Simplifies out vector types from Type1 and Type2, if both are vectors
-  /// of the same size. Returns true iff both are vectors of the same size,
-  /// or are both scalar types.
+  /// Simplifies out vector types from Type1 and Type2, if both are vectors of
+  /// the same size. Returns true iff both are vectors of the same size, or are
+  /// both scalar types.
   static bool simplifyOutCommonVectorType(Ice::Type &Type1, Ice::Type &Type2) {
     bool IsType1Vector = isVectorType(Type1);
     bool IsType2Vector = isVectorType(Type2);
@@ -1781,8 +1759,8 @@
     return isFloatTruncCastValid(TargetType, SourceType);
   }
 
-  /// Returns true iff a cast from floating type SourceType to integer
-  /// type TargetType is valid.
+  /// Returns true iff a cast from floating type SourceType to integer type
+  /// TargetType is valid.
   static bool isFloatToIntCastValid(Ice::Type SourceType,
                                     Ice::Type TargetType) {
     if (!(Ice::isFloatingType(SourceType) && Ice::isIntegerType(TargetType)))
@@ -1797,15 +1775,15 @@
     return true;
   }
 
-  /// Returns true iff a cast from integer type SourceType to floating
-  /// type TargetType is valid.
+  /// Returns true iff a cast from integer type SourceType to floating type
+  /// TargetType is valid.
   static bool isIntToFloatCastValid(Ice::Type SourceType,
                                     Ice::Type TargetType) {
     return isFloatToIntCastValid(TargetType, SourceType);
   }
 
-  /// Returns the number of bits used to model type Ty when defining the
-  /// bitcast instruction.
+  /// Returns the number of bits used to model type Ty when defining the bitcast
+  /// instruction.
   static Ice::SizeT bitcastSizeInBits(Ice::Type Ty) {
     if (Ice::isVectorType(Ty))
       return Ice::typeNumElements(Ty) *
@@ -1820,10 +1798,10 @@
     return bitcastSizeInBits(SourceType) == bitcastSizeInBits(TargetType);
   }
 
-  /// Returns true iff the NaCl bitcode Opcode is a valid cast opcode
-  /// for converting SourceType to TargetType. Updates CastKind to the
-  /// corresponding instruction cast opcode. Also generates an error
-  /// message when this function returns false.
+  /// Returns true iff the NaCl bitcode Opcode is a valid cast opcode for
+  /// converting SourceType to TargetType. Updates CastKind to the corresponding
+  /// instruction cast opcode. Also generates an error message when this
+  /// function returns false.
   bool convertCastOpToIceOp(uint64_t Opcode, Ice::Type SourceType,
                             Ice::Type TargetType,
                             Ice::InstCast::OpKind &CastKind) {
@@ -1888,8 +1866,8 @@
     return Result;
   }
 
-  // Converts PNaCl bitcode Icmp operator to corresponding ICE op.
-  // Returns true if able to convert, false otherwise.
+  // Converts PNaCl bitcode Icmp operator to corresponding ICE op. Returns true
+  // if able to convert, false otherwise.
   bool convertNaClBitcICmpOpToIce(uint64_t Op,
                                   Ice::InstIcmp::ICond &Cond) const {
     switch (Op) {
@@ -1930,8 +1908,8 @@
     }
   }
 
-  // Converts PNaCl bitcode Fcmp operator to corresponding ICE op.
-  // Returns true if able to convert, false otherwise.
+  // Converts PNaCl bitcode Fcmp operator to corresponding ICE op. Returns true
+  // if able to convert, false otherwise.
   bool convertNaClBitcFCompOpToIce(uint64_t Op,
                                    Ice::InstFcmp::FCond &Cond) const {
     switch (Op) {
@@ -1990,15 +1968,14 @@
     }
   }
 
-  // Creates an error instruction, generating a value of type Ty, and
-  // adds a placeholder so that instruction indices line up.
-  // Some instructions, such as a call, will not generate a value
-  // if the return type is void. In such cases, a placeholder value
-  // for the badly formed instruction is not needed. Hence, if Ty is
-  // void, an error instruction is not appended.
+  // Creates an error instruction, generating a value of type Ty, and adds a
+  // placeholder so that instruction indices line up. Some instructions, such
+  // as a call, will not generate a value if the return type is void. In such
+  // cases, a placeholder value for the badly formed instruction is not needed.
+  // Hence, if Ty is void, an error instruction is not appended.
   void appendErrorInstruction(Ice::Type Ty) {
-    // Note: we don't worry about downstream translation errors because
-    // the function will not be translated if any errors occur.
+    // Note: we don't worry about downstream translation errors because the
+    // function will not be translated if any errors occur.
     if (Ty == Ice::IceType_void)
       return;
     Ice::Variable *Var = getNextInstVar(Ty);
@@ -2025,8 +2002,8 @@
   }
   if (isIRGenerationDisabled())
     return;
-  // Before translating, check for blocks without instructions, and
-  // insert unreachable. This shouldn't happen, but be safe.
+  // Before translating, check for blocks without instructions, and insert
+  // unreachable. This shouldn't happen, but be safe.
   size_t Index = 0;
   for (Ice::CfgNode *Node : Func->getNodes()) {
     if (Node->getInsts().empty()) {
@@ -2051,8 +2028,8 @@
 }
 
 void FunctionParser::ProcessRecord() {
-  // Note: To better separate parse/IR generation times, when IR generation
-  // is disabled we do the following:
+  // Note: To better separate parse/IR generation times, when IR generation is
+  // disabled we do the following:
   // 1) Delay exiting until after we extract operands.
   // 2) return before we access operands, since all operands will be a nullptr.
   const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
@@ -2382,11 +2359,10 @@
     // SWITCH: [Condty, Cond, BbIndex, NumCases Case ...]
     // where Case = [1, 1, Value, BbIndex].
     //
-    // Note: Unlike most instructions, we don't infer the type of
-    // Cond, but provide it as a separate field. There are also
-    // unnecesary data fields (i.e. constants 1).  These were not
-    // cleaned up in PNaCl bitcode because the bitcode format was
-    // already frozen when the problem was noticed.
+    // Note: Unlike most instructions, we don't infer the type of Cond, but
+    // provide it as a separate field. There are also unnecessary data fields
+    // (i.e. constants 1). These were not cleaned up in PNaCl bitcode because
+    // the bitcode format was already frozen when the problem was noticed.
     InstIsTerminating = true;
     if (!isValidRecordSizeAtLeast(4, "switch"))
       return;
@@ -2591,13 +2567,12 @@
     // CALL: [cc, fnid, arg0, arg1...]
     // CALL_INDIRECT: [cc, fn, returnty, args...]
     //
-    // Note: The difference between CALL and CALL_INDIRECT is that
-    // CALL has a reference to an explicit function declaration, while
-    // the CALL_INDIRECT is just an address. For CALL, we can infer
-    // the return type by looking up the type signature associated
-    // with the function declaration. For CALL_INDIRECT we can only
-    // infer the type signature via argument types, and the
-    // corresponding return type stored in CALL_INDIRECT record.
+    // Note: The difference between CALL and CALL_INDIRECT is that CALL has a
+    // reference to an explicit function declaration, while the CALL_INDIRECT
+    // is just an address. For CALL, we can infer the return type by looking up
+    // the type signature associated with the function declaration. For
+    // CALL_INDIRECT we can only infer the type signature via argument types,
+    // and the corresponding return type stored in CALL_INDIRECT record.
     Ice::SizeT ParamsStartIndex = 2;
     if (Record.GetCode() == naclbitc::FUNC_CODE_INST_CALL) {
       if (!isValidRecordSizeAtLeast(2, "call"))
@@ -2763,8 +2738,8 @@
 
   Ice::GlobalContext *getContext() { return getTranslator().getContext(); }
 
-  // Returns true if the type to use for succeeding constants is defined.
-  // If false, also generates an error message.
+  // Returns true if the type to use for succeeding constants is defined. If
+  // false, also generates an error message.
   bool isValidNextConstantType() {
     if (NextConstantType != Ice::IceType_void)
       return true;
@@ -2887,8 +2862,8 @@
   void setValueName(NaClBcIndexSize_t Index, StringType &Name) override;
   void setBbName(NaClBcIndexSize_t Index, StringType &Name) override;
 
-  // Reports that the assignment of Name to the value associated with
-  // index is not possible, for the given Context.
+  // Reports that the assignment of Name to the value associated with index is
+  // not possible, for the given Context.
   void reportUnableToAssign(const char *Context, NaClBcIndexSize_t Index,
                             StringType &Name) {
     std::string Buffer;
@@ -2976,10 +2951,10 @@
   // and have generated global constant initializers.
   bool GlobalDeclarationNamesAndInitializersInstalled = false;
 
-  // Generates names for unnamed global addresses (i.e. functions and
-  // global variables). Then lowers global variable declaration
-  // initializers to the target. May be called multiple times. Only
-  // the first call will do the installation.
+  // Generates names for unnamed global addresses (i.e. functions and global
+  // variables). Then lowers global variable declaration initializers to the
+  // target. May be called multiple times. Only the first call will do the
+  // installation.
   void installGlobalNamesAndGlobalVarInitializers() {
     if (!GlobalDeclarationNamesAndInitializersInstalled) {
       Context->installGlobalNames();
@@ -3130,11 +3105,11 @@
 
 void PNaClTranslator::translate(const std::string &IRFilename,
                                 std::unique_ptr<MemoryObject> &&MemObj) {
-  // On error, we report_fatal_error to avoid destroying the MemObj.
-  // That may still be in use by IceBrowserCompileServer. Otherwise,
-  // we need to change the MemObj to be ref-counted, or have a wrapper,
-  // or simply leak. We also need a hook to tell the IceBrowserCompileServer
-  // to unblock its QueueStreamer.
+  // On error, we report_fatal_error to avoid destroying the MemObj. That may
+  // still be in use by IceBrowserCompileServer. Otherwise, we need to change
+  // the MemObj to be ref-counted, or have a wrapper, or simply leak. We also
+  // need a hook to tell the IceBrowserCompileServer to unblock its
+  // QueueStreamer.
   // https://code.google.com/p/nativeclient/issues/detail?id=4163
   Ostream &ErrStream = getContext()->getStrError();
   // Read header and verify it is good.
diff --git a/src/PNaClTranslator.h b/src/PNaClTranslator.h
index 24a627b..8a045ad 100644
--- a/src/PNaClTranslator.h
+++ b/src/PNaClTranslator.h
@@ -37,8 +37,8 @@
   ~PNaClTranslator() override = default;
 
   /// Reads the PNaCl bitcode file and translates to ICE, which is then
-  /// converted to machine code. Sets ErrorStatus to 1 if any errors
-  /// occurred. Takes ownership of the MemoryObject.
+  /// converted to machine code. Sets ErrorStatus to 1 if any errors occurred.
+  /// Takes ownership of the MemoryObject.
   void translate(const std::string &IRFilename,
                  std::unique_ptr<llvm::MemoryObject> &&MemoryObject);
 
diff --git a/src/README.SIMD.rst b/src/README.SIMD.rst
index 58f25d9..f8cf08f 100644
--- a/src/README.SIMD.rst
+++ b/src/README.SIMD.rst
@@ -1,13 +1,14 @@
 Missing support
 ===============
 
-* The PNaCl LLVM backend expands shufflevector operations into
-  sequences of insertelement and extractelement operations. For
-  instance:
+* The PNaCl LLVM backend expands shufflevector operations into sequences of
+  insertelement and extractelement operations. For instance:
 
     define <4 x i32> @shuffle(<4 x i32> %arg1, <4 x i32> %arg2) {
     entry:
-      %res = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+      %res = shufflevector <4 x i32> %arg1,
+                           <4 x i32> %arg2,
+                           <4 x i32> <i32 4, i32 5, i32 0, i32 1>
       ret <4 x i32> %res
     }
 
@@ -30,38 +31,34 @@
   shuffle operations where appropriate.
 
 * Add support for vector constants in the backend. The current code
-  materializes the vector constants it needs (eg. for performing icmp
-  on unsigned operands) using register operations, but this should be
-  changed to loading them from a constant pool if the register
-  initialization is too complicated (such as in
-  TargetX8632::makeVectorOfHighOrderBits()).
+  materializes the vector constants it needs (eg. for performing icmp on
+  unsigned operands) using register operations, but this should be changed to
+  loading them from a constant pool if the register initialization is too
+  complicated (such as in TargetX8632::makeVectorOfHighOrderBits()).
 
-* [x86 specific] llvm-mc does not allow lea to take a mem128 memory
-  operand when assembling x86-32 code. The current
-  InstX8632Lea::emit() code uses Variable::asType() to convert any
-  mem128 Variables into a compatible memory operand type. However, the
-  emit code does not do any conversions of OperandX8632Mem, so if an
-  OperandX8632Mem is passed to lea as mem128 the resulting code will
-  not assemble.  One way to fix this is by implementing
+* [x86 specific] llvm-mc does not allow lea to take a mem128 memory operand
+  when assembling x86-32 code. The current InstX8632Lea::emit() code uses
+  Variable::asType() to convert any mem128 Variables into a compatible memory
+  operand type. However, the emit code does not do any conversions of
+  OperandX8632Mem, so if an OperandX8632Mem is passed to lea as mem128 the
+  resulting code will not assemble.  One way to fix this is by implementing
   OperandX8632Mem::asType().
 
-* [x86 specific] Lower shl with <4 x i32> using some clever float
-  conversion:
+* [x86 specific] Lower shl with <4 x i32> using some clever float conversion:
 http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20100726/105087.html
 
-* [x86 specific] Add support for using aligned mov operations
-  (movaps). This will require passing alignment information to loads
-  and stores.
+* [x86 specific] Add support for using aligned mov operations (movaps). This
+  will require passing alignment information to loads and stores.
 
 x86 SIMD Diversification
 ========================
 
-* Vector "bitwise" operations have several variant instructions: the
-  AND operation can be implemented with pand, andpd, or andps. This
-  pattern also holds for ANDN, OR, and XOR.
+* Vector "bitwise" operations have several variant instructions: the AND
+  operation can be implemented with pand, andpd, or andps. This pattern also
+  holds for ANDN, OR, and XOR.
 
-* Vector "mov" instructions can be diversified (eg. movdqu instead of
-  movups) at the cost of a possible performance penalty.
+* Vector "mov" instructions can be diversified (eg. movdqu instead of movups)
+  at the cost of a possible performance penalty.
 
-* Scalar FP arithmetic can be diversified by performing the operations
-  with the vector version of the instructions.
+* Scalar FP arithmetic can be diversified by performing the operations with the
+  vector version of the instructions.
diff --git a/src/main.cpp b/src/main.cpp
index a6cb490..fa5bb06 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file defines the entry point for translating PNaCl bitcode into
-/// native code.
+/// This file defines the entry point for translating PNaCl bitcode into native
+/// code.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -22,8 +22,8 @@
   Ice::Compiler Comp;
 // Can only compile the BrowserCompileServer w/ the NaCl compiler.
 #if PNACL_BROWSER_TRANSLATOR
-  // There are no real commandline arguments in the browser case.
-  // They are supplied via IPC.
+  // There are no real commandline arguments in the browser case. They are
+  // supplied via IPC.
   assert(argc == 1);
   (void)argc;
   (void)argv;