Reflow comments to use the full width.
BUG=
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1341423002 .
diff --git a/src/IceAPFloat.h b/src/IceAPFloat.h
index ccfb7f3..9aed889 100644
--- a/src/IceAPFloat.h
+++ b/src/IceAPFloat.h
@@ -11,8 +11,8 @@
/// \brief This file implements a class to represent Subzero float and double
/// values.
///
-/// Note: This is a simplified version of
-/// llvm/include/llvm/ADT/APFloat.h for use with Subzero.
+/// Note: This is a simplified version of llvm/include/llvm/ADT/APFloat.h for
+/// use with Subzero.
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICEAPFLOAT_H
diff --git a/src/IceAssembler.cpp b/src/IceAssembler.cpp
index 5c1760b..9c77dce 100644
--- a/src/IceAssembler.cpp
+++ b/src/IceAssembler.cpp
@@ -48,13 +48,13 @@
}
void AssemblerBuffer::EnsureCapacity::validate(AssemblerBuffer *buffer) {
- // In debug mode, we save the assembler buffer along with the gap
- // size before we start emitting to the buffer. This allows us to
- // check that any single generated instruction doesn't overflow the
- // limit implied by the minimum gap size.
+ // In debug mode, we save the assembler buffer along with the gap size before
+ // we start emitting to the buffer. This allows us to check that any single
+ // generated instruction doesn't overflow the limit implied by the minimum
+ // gap size.
Gap = computeGap();
- // Make sure that extending the capacity leaves a big enough gap
- // for any kind of instruction.
+ // Make sure that extending the capacity leaves a big enough gap for any kind
+ // of instruction.
assert(Gap >= kMinimumGap);
// Mark the buffer as having ensured the capacity.
assert(!buffer->hasEnsuredCapacity()); // Cannot nest.
@@ -64,8 +64,8 @@
AssemblerBuffer::EnsureCapacity::~EnsureCapacity() {
// Unmark the buffer, so we cannot emit after this.
Buffer->HasEnsuredCapacity = false;
- // Make sure the generated instruction doesn't take up more
- // space than the minimum gap.
+ // Make sure the generated instruction doesn't take up more space than the
+ // minimum gap.
intptr_t delta = Gap - computeGap();
(void)delta;
assert(delta <= kMinimumGap);
@@ -133,9 +133,9 @@
}
Str << "\t.long ";
// For PCRel fixups, we write the pc-offset from a symbol into the Buffer
- // (e.g., -4), but we don't represent that in the fixup's offset.
- // Otherwise the fixup holds the true offset, and so does the Buffer.
- // Just load the offset from the buffer.
+ // (e.g., -4), but we don't represent that in the fixup's offset. Otherwise
+ // the fixup holds the true offset, and so does the Buffer. Just load the
+ // offset from the buffer.
NextFixup->emit(Ctx, Buffer.load<RelocOffsetT>(NextFixupLoc));
if (fixupIsPCRel(NextFixup->kind()))
Str << " - .";
diff --git a/src/IceAssembler.h b/src/IceAssembler.h
index 5b07975..8247e66 100644
--- a/src/IceAssembler.h
+++ b/src/IceAssembler.h
@@ -15,9 +15,9 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares the Assembler base class. Instructions are assembled
-/// by architecture-specific assemblers that derive from this base class.
-/// This base class manages buffers and fixups for emitting code, etc.
+/// This file declares the Assembler base class. Instructions are assembled by
+/// architecture-specific assemblers that derive from this base class. This base
+/// class manages buffers and fixups for emitting code, etc.
///
//===----------------------------------------------------------------------===//
@@ -55,7 +55,7 @@
}
/// Returns the position of an earlier branch instruction that was linked to
- /// this label (branches that use this are considered forward branches). The
+ /// this label (branches that use this are considered forward branches). The
/// linked instructions form a linked list, of sorts, using the instruction's
/// displacement field for the location of the next instruction that is also
/// linked to this label.
@@ -200,8 +200,8 @@
return (Limit - Contents) + kMinimumGap;
}
- /// Compute the limit based on the data area and the capacity. See
- /// description of kMinimumGap for the reasoning behind the value.
+ /// Compute the limit based on the data area and the capacity. See description
+ /// of kMinimumGap for the reasoning behind the value.
static uintptr_t computeLimit(uintptr_t Data, intptr_t Capacity) {
return Data + Capacity - kMinimumGap;
}
@@ -226,12 +226,12 @@
/// Allocate a chunk of bytes using the per-Assembler allocator.
uintptr_t allocateBytes(size_t bytes) {
- // For now, alignment is not related to NaCl bundle alignment, since
- // the buffer's GetPosition is relative to the base. So NaCl bundle
- // alignment checks can be relative to that base. Later, the buffer
- // will be copied out to a ".text" section (or an in memory-buffer
- // that can be mprotect'ed with executable permission), and that
- // second buffer should be aligned for NaCl.
+ // For now, alignment is not related to NaCl bundle alignment, since the
+ // buffer's GetPosition is relative to the base. So NaCl bundle alignment
+ // checks can be relative to that base. Later, the buffer will be copied
+ // out to a ".text" section (or an in memory-buffer that can be mprotect'ed
+ // with executable permission), and that second buffer should be aligned
+ // for NaCl.
const size_t Alignment = 16;
return reinterpret_cast<uintptr_t>(Allocator.Allocate(bytes, Alignment));
}
@@ -257,8 +257,8 @@
/// Get the label for a CfgNode.
virtual Label *getCfgNodeLabel(SizeT NodeNumber) = 0;
- /// Mark the current text location as the start of a CFG node
- /// (represented by NodeNumber).
+ /// Mark the current text location as the start of a CFG node (represented by
+ /// NodeNumber).
virtual void bindCfgNodeLabel(SizeT NodeNumber) = 0;
virtual bool fixupIsPCRel(FixupKind Kind) const = 0;
@@ -293,15 +293,15 @@
const AssemblerKind Kind;
ArenaAllocator<32 * 1024> Allocator;
- /// FunctionName and IsInternal are transferred from the original Cfg
- /// object, since the Cfg object may be deleted by the time the
- /// assembler buffer is emitted.
+ /// FunctionName and IsInternal are transferred from the original Cfg object,
+ /// since the Cfg object may be deleted by the time the assembler buffer is
+ /// emitted.
IceString FunctionName = "";
bool IsInternal = false;
- /// Preliminary indicates whether a preliminary pass is being made
- /// for calculating bundle padding (Preliminary=true), versus the
- /// final pass where all changes to label bindings, label links, and
- /// relocation fixups are fully committed (Preliminary=false).
+ /// Preliminary indicates whether a preliminary pass is being made for
+ /// calculating bundle padding (Preliminary=true), versus the final pass where
+ /// all changes to label bindings, label links, and relocation fixups are
+ /// fully committed (Preliminary=false).
bool Preliminary = false;
protected:
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index ab1a29d..7c4be6f 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -195,8 +195,8 @@
Type, typename Traits::GPRRegister, typename Traits::GPRRegister,
const Immediate &);
struct GPREmitterShiftD {
- // Technically AddrGPR and AddrGPRImm are also allowed, but in practice
- // we always normalize Dest to a Register first.
+ // Technically AddrGPR and AddrGPRImm are also allowed, but in practice we
+ // always normalize Dest to a Register first.
TypedEmitGPRGPR GPRGPR;
TypedEmitGPRGPRImm GPRGPRImm;
};
@@ -252,8 +252,8 @@
TypedEmitAddr RegAddr;
};
- // Three operand (potentially) cross Xmm/GPR instructions.
- // The last operand must be an immediate.
+ // Three operand (potentially) cross Xmm/GPR instructions. The last operand
+ // must be an immediate.
template <typename DReg_t, typename SReg_t> struct ThreeOpImmEmitter {
using TypedEmitRegRegImm = void (AssemblerX86Base::*)(Type, DReg_t, SReg_t,
const Immediate &);
@@ -906,8 +906,8 @@
Label *getOrCreateLabel(SizeT Number, LabelVector &Labels);
- // The arith_int() methods factor out the commonality between the encodings of
- // add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp(). The Tag
+ // The arith_int() methods factor out the commonality between the encodings
+ // of add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp(). The Tag
// parameter is statically asserted to be less than 8.
template <uint32_t Tag>
void arith_int(Type Ty, typename Traits::GPRRegister reg,
@@ -957,10 +957,10 @@
isByteSizedType(Ty);
}
- // assembleAndEmitRex is used for determining which (if any) rex prefix should
- // be emitted for the current instruction. It allows different types for Reg
- // and Rm because they could be of different types (e.g., in mov[sz]x
- // instrutions.) If Addr is not nullptr, then Rm is ignored, and Rex.B is
+ // assembleAndEmitRex is used for determining which (if any) rex prefix
+ // should be emitted for the current instruction. It allows different types
+ // for Reg and Rm because they could be of different types (e.g., in mov[sz]x
+ // instructions.) If Addr is not nullptr, then Rm is ignored, and Rex.B is
// determined by Addr instead. TyRm is still used to determine Addr's size.
template <typename RegType, typename RmType, typename T = Traits>
typename std::enable_if<T::Is64Bit, void>::type
@@ -1005,9 +1005,9 @@
assembleAndEmitRex(TyReg, Reg, TyRm, Rm);
}
- // emitRexB is used for emitting a Rex prefix if one is needed on encoding the
- // Reg field in an x86 instruction. It is invoked by the template when Reg is
- // the single register operand in the instruction (e.g., push Reg.)
+ // emitRexB is used for emitting a Rex prefix if one is needed on encoding
+ // the Reg field in an x86 instruction. It is invoked by the template when
+ // Reg is the single register operand in the instruction (e.g., push Reg.)
template <typename RmType> void emitRexB(const Type Ty, const RmType Rm) {
emitRexRB(Ty, RexRegIrrelevant, Ty, Rm);
}
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index f449dae..b1013d6 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -1068,9 +1068,8 @@
}
// {add,sub,mul,div}ps are given a Ty parameter for consistency with
-// {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows
-// addpd, etc., we can use the Ty parameter to decide on adding
-// a 0x66 prefix.
+// {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows addpd, etc.,
+// we can use the Ty parameter to decide on adding a 0x66 prefix.
template <class Machine>
void AssemblerX86Base<Machine>::addps(Type /* Ty */,
typename Traits::XmmRegister dst,
@@ -1836,8 +1835,8 @@
emitUint8(0x0F);
emitUint8(0x3A);
emitUint8(isByteSizedType(Ty) ? 0x14 : 0x16);
- // SSE 4.1 versions are "MRI" because dst can be mem, while
- // pextrw (SSE2) is RMI because dst must be reg.
+ // SSE 4.1 versions are "MRI" because dst can be mem, while pextrw (SSE2)
+ // is RMI because dst must be reg.
emitXmmRegisterOperand(src, dst);
emitUint8(imm.value());
}
@@ -2147,11 +2146,11 @@
void AssemblerX86Base<Machine>::test(Type Ty, typename Traits::GPRRegister reg,
const Immediate &immediate) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
- // For registers that have a byte variant (EAX, EBX, ECX, and EDX)
- // we only test the byte register to keep the encoding short.
- // This is legal even if the register had high bits set since
- // this only sets flags registers based on the "AND" of the two operands,
- // and the immediate had zeros at those high bits.
+ // For registers that have a byte variant (EAX, EBX, ECX, and EDX) we only
+ // test the byte register to keep the encoding short. This is legal even if
+ // the register had high bits set since this only sets flags registers based
+ // on the "AND" of the two operands, and the immediate had zeros at those
+ // high bits.
if (immediate.is_uint8() && reg <= Traits::Last8BitGPR) {
// Use zero-extended 8-bit immediate.
emitRexB(Ty, reg);
@@ -2183,8 +2182,8 @@
const typename Traits::Address &addr,
const Immediate &immediate) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
- // If the immediate is short, we only test the byte addr to keep the
- // encoding short.
+ // If the immediate is short, we only test the byte addr to keep the encoding
+ // short.
if (immediate.is_uint8()) {
// Use zero-extended 8-bit immediate.
emitRex(Ty, addr, RexRegIrrelevant);
@@ -3016,10 +3015,10 @@
// TODO(stichnot): Here and in jmp(), we may need to be more
// conservative about the backward branch distance if the branch
// instruction is within a bundle_lock sequence, because the
- // distance may increase when padding is added. This isn't an
- // issue for branches outside a bundle_lock, because if padding
- // is added, the retry may change it to a long backward branch
- // without affecting any of the bookkeeping.
+ // distance may increase when padding is added. This isn't an issue for
+ // branches outside a bundle_lock, because if padding is added, the retry
+ // may change it to a long backward branch without affecting any of the
+ // bookkeeping.
emitUint8(0x70 + condition);
emitUint8((offset - kShortSize) & 0xFF);
} else {
diff --git a/src/IceBrowserCompileServer.cpp b/src/IceBrowserCompileServer.cpp
index 03127a9..4d5705f 100644
--- a/src/IceBrowserCompileServer.cpp
+++ b/src/IceBrowserCompileServer.cpp
@@ -106,14 +106,13 @@
BrowserCompileServer *Server =
reinterpret_cast<BrowserCompileServer *>(UserData);
Server->setFatalError(Reason);
- // Only kill the current thread instead of the whole process.
- // We need the server thread to remain alive in order to respond with the
- // error message.
+ // Only kill the current thread instead of the whole process. We need the
+ // server thread to remain alive in order to respond with the error message.
// We could also try to pthread_kill all other worker threads, but
- // pthread_kill / raising signals is not supported by NaCl.
- // We'll have to assume that the worker/emitter threads will be well behaved
- // after a fatal error in other threads, and either get stuck waiting
- // on input from a previous stage, or also call report_fatal_error.
+ // pthread_kill / raising signals is not supported by NaCl. We'll have to
+ // assume that the worker/emitter threads will be well behaved after a fatal
+ // error in other threads, and either get stuck waiting on input from a
+ // previous stage, or also call report_fatal_error.
pthread_exit(0);
}
@@ -143,8 +142,8 @@
}
bool BrowserCompileServer::pushInputBytes(const void *Data, size_t NumBytes) {
- // If there was an earlier error, do not attempt to push bytes to
- // the QueueStreamer. Otherwise the thread could become blocked.
+ // If there was an earlier error, do not attempt to push bytes to the
+ // QueueStreamer. Otherwise the thread could become blocked.
if (HadError.load())
return true;
return InputStream->PutBytes(
@@ -163,8 +162,8 @@
ErrorCode &BrowserCompileServer::getErrorCode() {
if (HadError.load()) {
// HadError means report_fatal_error is called. Make sure that the
- // LastError is not EC_None. We don't know the type of error so
- // just pick some error category.
+ // LastError is not EC_None. We don't know the type of error so just pick
+ // some error category.
LastError.assign(EC_Translation);
}
return LastError;
diff --git a/src/IceBrowserCompileServer.h b/src/IceBrowserCompileServer.h
index e76b820..f23ab17 100644
--- a/src/IceBrowserCompileServer.h
+++ b/src/IceBrowserCompileServer.h
@@ -31,12 +31,11 @@
namespace Ice {
-/// The browser variant of the compile server.
-/// Compared to the commandline version, this version gets compile
-/// requests over IPC. Each compile request will have a slimmed down
-/// version of argc, argv while other flags are set to defaults that
-/// make sense in the browser case. The output file is specified via
-/// a posix FD, and input bytes are pushed to the server.
+/// The browser variant of the compile server. Compared to the commandline
+/// version, this version gets compile requests over IPC. Each compile request
+/// will have a slimmed down version of argc, argv while other flags are set to
+/// defaults that make sense in the browser case. The output file is specified
+/// via a posix FD, and input bytes are pushed to the server.
class BrowserCompileServer : public CompileServer {
BrowserCompileServer() = delete;
BrowserCompileServer(const BrowserCompileServer &) = delete;
@@ -56,12 +55,12 @@
/// Parse and set up the flags for compile jobs.
void getParsedFlags(uint32_t NumThreads, int argc, char **argv);
- /// Creates the streams + context and starts the compile thread,
- /// handing off the streams + context.
+ /// Creates the streams + context and starts the compile thread, handing off
+ /// the streams + context.
void startCompileThread(int OutFD);
- /// Call to push more bytes to the current input stream.
- /// Returns false on success and true on error.
+ /// Call to push more bytes to the current input stream. Returns false on
+ /// success and true on error.
bool pushInputBytes(const void *Data, size_t NumBytes);
/// Notify the input stream of EOF.
@@ -72,9 +71,8 @@
CompileThread.join();
if (Ctx->getErrorStatus()->value())
LastError.assign(Ctx->getErrorStatus()->value());
- // Reset some state. The InputStream is deleted by the compiler
- // so only reset this to nullptr. Free and flush the rest
- // of the streams.
+ // Reset some state. The InputStream is deleted by the compiler so only
+ // reset this to nullptr. Free and flush the rest of the streams.
InputStream = nullptr;
EmitStream.reset(nullptr);
ELFStream.reset(nullptr);
@@ -95,12 +93,12 @@
std::string Buffer;
llvm::raw_string_ostream StrBuf;
};
- /// This currently only handles a single compile request, hence one copy
- /// of the state.
+ /// This currently only handles a single compile request, hence one copy of
+ /// the state.
std::unique_ptr<GlobalContext> Ctx;
- /// A borrowed reference to the current InputStream. The compiler owns
- /// the actual reference so the server must be careful not to access
- /// after the compiler is done.
+ /// A borrowed reference to the current InputStream. The compiler owns the
+ /// actual reference so the server must be careful not to access after the
+ /// compiler is done.
llvm::QueueStreamer *InputStream = nullptr;
std::unique_ptr<Ostream> LogStream;
std::unique_ptr<llvm::raw_fd_ostream> EmitStream;
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index ed60abb..4c703cf 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -8,8 +8,7 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements the Cfg class, including constant pool
-/// management.
+/// This file implements the Cfg class, including constant pool management.
///
//===----------------------------------------------------------------------===//
@@ -46,8 +45,8 @@
TargetAssembler(TargetLowering::createAssembler(
Ctx->getFlags().getTargetArch(), this)) {
if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Randomize) {
- // If -randomize-pool-immediates=randomize, create a random number generator
- // to generate a cookie for constant blinding.
+ // If -randomize-pool-immediates=randomize, create a random number
+ // generator to generate a cookie for constant blinding.
RandomNumberGenerator RNG(Ctx->getFlags().getRandomSeed(),
RPE_ConstantBlinding, this->SequenceNumber);
ConstantBlindingCookie =
@@ -86,8 +85,8 @@
ImplicitArgs.push_back(Arg);
}
-// Returns whether the stack frame layout has been computed yet. This
-// is used for dumping the stack frame location of Variables.
+// Returns whether the stack frame layout has been computed yet. This is used
+// for dumping the stack frame location of Variables.
bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }
namespace {
@@ -157,8 +156,8 @@
void Cfg::translate() {
if (hasError())
return;
- // FunctionTimer conditionally pushes/pops a TimerMarker if
- // TimeEachFunction is enabled.
+ // FunctionTimer conditionally pushes/pops a TimerMarker if TimeEachFunction
+ // is enabled.
std::unique_ptr<TimerMarker> FunctionTimer;
if (BuildDefs::dump()) {
const IceString &TimingFocusOn =
@@ -180,16 +179,16 @@
if (getContext()->getFlags().getEnableBlockProfile()) {
profileBlocks();
- // TODO(jpp): this is fragile, at best. Figure out a better way of detecting
- // exit functions.
+ // TODO(jpp): this is fragile, at best. Figure out a better way of
+ // detecting exit functions.
if (GlobalContext::matchSymbolName(getFunctionName(), "exit")) {
addCallToProfileSummary();
}
dump("Profiled CFG");
}
- // The set of translation passes and their order are determined by
- // the target.
+ // The set of translation passes and their order are determined by the
+ // target.
getTarget()->translate();
dump("Final output");
@@ -273,8 +272,8 @@
for (Variable *Var : Variables) {
Var->getLiveRange().reset();
}
- // This splits edges and appends new nodes to the end of the node
- // list. This can invalidate iterators, so don't use an iterator.
+ // This splits edges and appends new nodes to the end of the node list. This
+ // can invalidate iterators, so don't use an iterator.
SizeT NumNodes = getNumNodes();
SizeT NumVars = getNumVariables();
for (SizeT I = 0; I < NumNodes; ++I)
@@ -282,8 +281,8 @@
TimerMarker TT(TimerStack::TT_lowerPhiAssignments, this);
if (true) {
- // The following code does an in-place update of liveness and live ranges as
- // a result of adding the new phi edge split nodes.
+ // The following code does an in-place update of liveness and live ranges
+ // as a result of adding the new phi edge split nodes.
getLiveness()->initPhiEdgeSplits(Nodes.begin() + NumNodes,
Variables.begin() + NumVars);
TimerMarker TTT(TimerStack::TT_liveness, this);
@@ -297,7 +296,7 @@
}
} else {
// The following code does a brute-force recalculation of live ranges as a
- // result of adding the new phi edge split nodes. The liveness calculation
+ // result of adding the new phi edge split nodes. The liveness calculation
// is particularly expensive because the new nodes are not yet in a proper
// topological order and so convergence is slower.
//
@@ -310,27 +309,25 @@
Target->regAlloc(RAK_Phi);
}
-// Find a reasonable placement for nodes that have not yet been
-// placed, while maintaining the same relative ordering among already
-// placed nodes.
+// Find a reasonable placement for nodes that have not yet been placed, while
+// maintaining the same relative ordering among already placed nodes.
void Cfg::reorderNodes() {
- // TODO(ascull): it would be nice if the switch tests were always followed
- // by the default case to allow for fall through.
+ // TODO(ascull): it would be nice if the switch tests were always followed by
+ // the default case to allow for fall through.
using PlacedList = std::list<CfgNode *>;
PlacedList Placed; // Nodes with relative placement locked down
PlacedList Unreachable; // Unreachable nodes
PlacedList::iterator NoPlace = Placed.end();
- // Keep track of where each node has been tentatively placed so that
- // we can manage insertions into the middle.
+ // Keep track of where each node has been tentatively placed so that we can
+ // manage insertions into the middle.
std::vector<PlacedList::iterator> PlaceIndex(Nodes.size(), NoPlace);
for (CfgNode *Node : Nodes) {
- // The "do ... while(0);" construct is to factor out the
- // --PlaceIndex and assert() statements before moving to the next
- // node.
+ // The "do ... while(0);" construct is to factor out the --PlaceIndex and
+ // assert() statements before moving to the next node.
do {
if (Node != getEntryNode() && Node->getInEdges().empty()) {
- // The node has essentially been deleted since it is not a
- // successor of any other node.
+ // The node has essentially been deleted since it is not a successor of
+ // any other node.
Unreachable.push_back(Node);
PlaceIndex[Node->getIndex()] = Unreachable.end();
Node->setNeedsPlacement(false);
@@ -343,10 +340,10 @@
continue;
}
Node->setNeedsPlacement(false);
- // Assume for now that the unplaced node is from edge-splitting
- // and therefore has 1 in-edge and 1 out-edge (actually, possibly
- // more than 1 in-edge if the predecessor node was contracted).
- // If this changes in the future, rethink the strategy.
+ // Assume for now that the unplaced node is from edge-splitting and
+ // therefore has 1 in-edge and 1 out-edge (actually, possibly more than 1
+ // in-edge if the predecessor node was contracted). If this changes in
+ // the future, rethink the strategy.
assert(Node->getInEdges().size() >= 1);
assert(Node->getOutEdges().size() == 1);
@@ -363,8 +360,8 @@
// Otherwise, place it after the (first) predecessor.
CfgNode *Pred = Node->getInEdges().front();
auto PredPosition = PlaceIndex[Pred->getIndex()];
- // It shouldn't be the case that PredPosition==NoPlace, but if
- // that somehow turns out to be true, we just insert Node before
+ // It shouldn't be the case that PredPosition==NoPlace, but if that
+ // somehow turns out to be true, we just insert Node before
// PredPosition=NoPlace=Placed.end() .
if (PredPosition != NoPlace)
++PredPosition;
@@ -475,9 +472,9 @@
LA.computeLoopNestDepth();
}
-// This is a lightweight version of live-range-end calculation. Marks the last
+// This is a lightweight version of live-range-end calculation. Marks the last
// use of only those variables whose definition and uses are completely with a
-// single block. It is a quick single pass and doesn't need to iterate until
+// single block. It is a quick single pass and doesn't need to iterate until
// convergence.
void Cfg::livenessLightweight() {
TimerMarker T(TimerStack::TT_livenessLightweight, this);
@@ -513,9 +510,9 @@
for (Variable *Var : Variables)
Var->resetLiveRange();
}
- // Make a final pass over each node to delete dead instructions,
- // collect the first and last instruction numbers, and add live
- // range segments for that node.
+ // Make a final pass over each node to delete dead instructions, collect the
+ // first and last instruction numbers, and add live range segments for that
+ // node.
for (CfgNode *Node : Nodes) {
InstNumberT FirstInstNum = Inst::NumberSentinel;
InstNumberT LastInstNum = Inst::NumberSentinel;
@@ -538,19 +535,18 @@
}
}
if (Mode == Liveness_Intervals) {
- // Special treatment for live in-args. Their liveness needs to
- // extend beyond the beginning of the function, otherwise an arg
- // whose only use is in the first instruction will end up having
- // the trivial live range [2,2) and will *not* interfere with
- // other arguments. So if the first instruction of the method
- // is "r=arg1+arg2", both args may be assigned the same
- // register. This is accomplished by extending the entry
- // block's instruction range from [2,n) to [1,n) which will
- // transform the problematic [2,2) live ranges into [1,2).
+ // Special treatment for live in-args. Their liveness needs to extend
+ // beyond the beginning of the function, otherwise an arg whose only use
+ // is in the first instruction will end up having the trivial live range
+ // [2,2) and will *not* interfere with other arguments. So if the first
+ // instruction of the method is "r=arg1+arg2", both args may be assigned
+ // the same register. This is accomplished by extending the entry block's
+ // instruction range from [2,n) to [1,n) which will transform the
+ // problematic [2,2) live ranges into [1,2).
if (Node == getEntryNode()) {
- // TODO(stichnot): Make it a strict requirement that the entry
- // node gets the lowest instruction numbers, so that extending
- // the live range for in-args is guaranteed to work.
+ // TODO(stichnot): Make it a strict requirement that the entry node
+ // gets the lowest instruction numbers, so that extending the live
+ // range for in-args is guaranteed to work.
FirstInstNum = Inst::NumberExtended;
}
Node->livenessAddIntervals(getLiveness(), FirstInstNum, LastInstNum);
@@ -558,8 +554,8 @@
}
}
-// Traverse every Variable of every Inst and verify that it
-// appears within the Variable's computed live range.
+// Traverse every Variable of every Inst and verify that it appears within the
+// Variable's computed live range.
bool Cfg::validateLiveness() const {
TimerMarker T(TimerStack::TT_validateLiveness, this);
bool Valid = true;
@@ -579,13 +575,12 @@
const bool IsDest = true;
if (!Dest->getLiveRange().containsValue(InstNumber, IsDest))
Invalid = true;
- // Check that this instruction actually *begins* Dest's live
- // range, by checking that Dest is not live in the previous
- // instruction. As a special exception, we don't check this
- // for the first instruction of the block, because a Phi
- // temporary may be live at the end of the previous block,
- // and if it is also assigned in the first instruction of
- // this block, the adjacent live ranges get merged.
+ // Check that this instruction actually *begins* Dest's live range,
+ // by checking that Dest is not live in the previous instruction. As
+ // a special exception, we don't check this for the first instruction
+ // of the block, because a Phi temporary may be live at the end of
+ // the previous block, and if it is also assigned in the first
+ // instruction of this block, the adjacent live ranges get merged.
if (static_cast<class Inst *>(&Inst) != FirstInst &&
!Inst.isDestNonKillable() &&
Dest->getLiveRange().containsValue(InstNumber - 1, IsDest))
@@ -642,9 +637,9 @@
// ======================== Dump routines ======================== //
-// emitTextHeader() is not target-specific (apart from what is
-// abstracted by the Assembler), so it is defined here rather than in
-// the target lowering class.
+// emitTextHeader() is not target-specific (apart from what is abstracted by
+// the Assembler), so it is defined here rather than in the target lowering
+// class.
void Cfg::emitTextHeader(const IceString &MangledName, GlobalContext *Ctx,
const Assembler *Asm) {
if (!BuildDefs::dump())
@@ -674,8 +669,8 @@
switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf:
case FT_Iasm: {
- // The emission needs to be delayed until the after the text section so save
- // the offsets in the global context.
+ // The emission needs to be delayed until the after the text section so
+ // save the offsets in the global context.
IceString MangledName = Ctx->mangleName(getFunctionName());
for (const InstJumpTable *JumpTable : JumpTables) {
SizeT NumTargets = JumpTable->getNumTargets();
@@ -726,8 +721,8 @@
void Cfg::emitIAS() {
TimerMarker T(TimerStack::TT_emit, this);
- // The emitIAS() routines emit into the internal assembler buffer,
- // so there's no need to lock the streams.
+ // The emitIAS() routines emit into the internal assembler buffer, so there's
+ // no need to lock the streams.
deleteJumpTableInsts();
const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
for (CfgNode *Node : Nodes) {
diff --git a/src/IceCfg.h b/src/IceCfg.h
index ca9d706..4147dd9 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares the Cfg class, which represents the control flow
-/// graph and the overall per-function compilation context.
+/// This file declares the Cfg class, which represents the control flow graph
+/// and the overall per-function compilation context.
///
//===----------------------------------------------------------------------===//
@@ -50,9 +50,9 @@
GlobalContext *getContext() const { return Ctx; }
uint32_t getSequenceNumber() const { return SequenceNumber; }
- /// Returns true if any of the specified options in the verbose mask
- /// are set. If the argument is omitted, it checks if any verbose
- /// options at all are set.
+ /// Returns true if any of the specified options in the verbose mask are set.
+ /// If the argument is omitted, it checks if any verbose options at all are
+ /// set.
bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; }
void setVerbose(VerboseMask Mask) { VMask = Mask; }
@@ -72,11 +72,10 @@
/// \name Manage errors.
/// @{
- /// Translation error flagging. If support for some construct is
- /// known to be missing, instead of an assertion failure, setError()
- /// should be called and the error should be propagated back up.
- /// This way, we can gracefully fail to translate and let a fallback
- /// translator handle the function.
+ /// Translation error flagging. If support for some construct is known to be
+ /// missing, instead of an assertion failure, setError() should be called and
+ /// the error should be propagated back up. This way, we can gracefully fail
+ /// to translate and let a fallback translator handle the function.
void setError(const IceString &Message);
bool hasError() const { return HasError; }
IceString getError() const { return ErrorMessage; }
@@ -98,11 +97,10 @@
/// @}
using IdentifierIndexType = int32_t;
- /// Adds a name to the list and returns its index, suitable for the
- /// argument to getIdentifierName(). No checking for duplicates is
- /// done. This is generally used for node names and variable names
- /// to avoid embedding a std::string inside an arena-allocated
- /// object.
+ /// Adds a name to the list and returns its index, suitable for the argument
+ /// to getIdentifierName(). No checking for duplicates is done. This is
+ /// generally used for node names and variable names to avoid embedding a
+ /// std::string inside an arena-allocated object.
IdentifierIndexType addIdentifierName(const IceString &Name) {
IdentifierIndexType Index = IdentifierNames.size();
IdentifierNames.push_back(Name);
@@ -122,8 +120,8 @@
/// \name Manage Variables.
/// @{
- /// Create a new Variable with a particular type and an optional
- /// name. The Node argument is the node where the variable is defined.
+ /// Create a new Variable with a particular type and an optional name. The
+ /// Node argument is the node where the variable is defined.
// TODO(jpp): untemplate this with separate methods: makeVariable,
// makeSpillVariable, and makeStackVariable.
template <typename T = Variable> T *makeVariable(Type Ty) {
@@ -176,9 +174,9 @@
/// Passes over the CFG.
void translate();
- /// After the CFG is fully constructed, iterate over the nodes and
- /// compute the predecessor and successor edges, in the form of
- /// CfgNode::InEdges[] and CfgNode::OutEdges[].
+ /// After the CFG is fully constructed, iterate over the nodes and compute the
+ /// predecessor and successor edges, in the form of CfgNode::InEdges[] and
+ /// CfgNode::OutEdges[].
void computeInOutEdges();
void renumberInstructions();
void placePhiLoads();
@@ -277,16 +275,15 @@
std::unique_ptr<VariableDeclarationList> GlobalInits;
std::vector<InstJumpTable *> JumpTables;
- /// CurrentNode is maintained during dumping/emitting just for
- /// validating Variable::DefNode. Normally, a traversal over
- /// CfgNodes maintains this, but before global operations like
- /// register allocation, resetCurrentNode() should be called to avoid
- /// spurious validation failures.
+ /// CurrentNode is maintained during dumping/emitting just for validating
+ /// Variable::DefNode. Normally, a traversal over CfgNodes maintains this, but
+ /// before global operations like register allocation, resetCurrentNode()
+ /// should be called to avoid spurious validation failures.
const CfgNode *CurrentNode = nullptr;
- /// Maintain a pointer in TLS to the current Cfg being translated.
- /// This is primarily for accessing its allocator statelessly, but
- /// other uses are possible.
+ /// Maintain a pointer in TLS to the current Cfg being translated. This is
+ /// primarily for accessing its allocator statelessly, but other uses are
+ /// possible.
ICE_TLS_DECLARE_FIELD(const Cfg *, CurrentCfg);
public:
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 0ccc6ea..31a6e8a 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements the CfgNode class, including the complexities
-/// of instruction insertion and in-edge calculation.
+/// This file implements the CfgNode class, including the complexities of
+/// instruction insertion and in-edge calculation.
///
//===----------------------------------------------------------------------===//
@@ -29,17 +29,16 @@
CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber)
: Func(Func), Number(LabelNumber), LabelNumber(LabelNumber) {}
-// Returns the name the node was created with. If no name was given,
-// it synthesizes a (hopefully) unique name.
+// Returns the name the node was created with. If no name was given, it
+// synthesizes a (hopefully) unique name.
IceString CfgNode::getName() const {
if (NameIndex >= 0)
return Func->getIdentifierName(NameIndex);
return "__" + std::to_string(LabelNumber);
}
-// Adds an instruction to either the Phi list or the regular
-// instruction list. Validates that all Phis are added before all
-// regular instructions.
+// Adds an instruction to either the Phi list or the regular instruction list.
+// Validates that all Phis are added before all regular instructions.
void CfgNode::appendInst(Inst *Inst) {
++InstCountEstimate;
if (InstPhi *Phi = llvm::dyn_cast<InstPhi>(Inst)) {
@@ -53,11 +52,10 @@
}
}
-// Renumbers the non-deleted instructions in the node. This needs to
-// be done in preparation for live range analysis. The instruction
-// numbers in a block must be monotonically increasing. The range of
-// instruction numbers in a block, from lowest to highest, must not
-// overlap with the range of any other block.
+// Renumbers the non-deleted instructions in the node. This needs to be done in
+// preparation for live range analysis. The instruction numbers in a block must
+// be monotonically increasing. The range of instruction numbers in a block,
+// from lowest to highest, must not overlap with the range of any other block.
void CfgNode::renumberInstructions() {
InstNumberT FirstNumber = Func->getNextInstNumber();
for (Inst &I : Phis)
@@ -67,10 +65,9 @@
InstCountEstimate = Func->getNextInstNumber() - FirstNumber;
}
-// When a node is created, the OutEdges are immediately known, but the
-// InEdges have to be built up incrementally. After the CFG has been
-// constructed, the computePredecessors() pass finalizes it by
-// creating the InEdges list.
+// When a node is created, the OutEdges are immediately known, but the InEdges
+// have to be built up incrementally. After the CFG has been constructed, the
+// computePredecessors() pass finalizes it by creating the InEdges list.
void CfgNode::computePredecessors() {
for (CfgNode *Succ : OutEdges)
Succ->InEdges.push_back(this);
@@ -80,19 +77,19 @@
OutEdges = Insts.rbegin()->getTerminatorEdges();
}
-// Validate each Phi instruction in the node with respect to control flow. For
-// every phi argument, its label must appear in the predecessor list. For each
-// predecessor, there must be a phi argument with that label. We don't check
+// Validate each Phi instruction in the node with respect to control flow. For
+// every phi argument, its label must appear in the predecessor list. For each
+// predecessor, there must be a phi argument with that label. We don't check
// that phi arguments with the same label have the same value.
void CfgNode::validatePhis() {
for (Inst &Instr : Phis) {
auto *Phi = llvm::cast<InstPhi>(&Instr);
- // We do a simple O(N^2) algorithm to check for consistency. Even so, it
- // shows up as only about 0.2% of the total translation time. But if
- // necessary, we could improve the complexity by using a hash table to count
- // how many times each node is referenced in the Phi instruction, and how
- // many times each node is referenced in the incoming edge list, and compare
- // the two for equality.
+ // We do a simple O(N^2) algorithm to check for consistency. Even so, it
+ // shows up as only about 0.2% of the total translation time. But if
+ // necessary, we could improve the complexity by using a hash table to
+ // count how many times each node is referenced in the Phi instruction, and
+ // how many times each node is referenced in the incoming edge list, and
+ // compare the two for equality.
for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
CfgNode *Label = Phi->getLabel(i);
bool Found = false;
@@ -120,17 +117,17 @@
}
}
-// This does part 1 of Phi lowering, by creating a new dest variable
-// for each Phi instruction, replacing the Phi instruction's dest with
-// that variable, and adding an explicit assignment of the old dest to
-// the new dest. For example,
+// This does part 1 of Phi lowering, by creating a new dest variable for each
+// Phi instruction, replacing the Phi instruction's dest with that variable,
+// and adding an explicit assignment of the old dest to the new dest. For
+// example,
// a=phi(...)
// changes to
// "a_phi=phi(...); a=a_phi".
//
-// This is in preparation for part 2 which deletes the Phi
-// instructions and appends assignment instructions to predecessor
-// blocks. Note that this transformation preserves SSA form.
+// This is in preparation for part 2 which deletes the Phi instructions and
+// appends assignment instructions to predecessor blocks. Note that this
+// transformation preserves SSA form.
void CfgNode::placePhiLoads() {
for (Inst &I : Phis) {
auto Phi = llvm::dyn_cast<InstPhi>(&I);
@@ -138,38 +135,35 @@
}
}
-// This does part 2 of Phi lowering. For each Phi instruction at each
-// out-edge, create a corresponding assignment instruction, and add
-// all the assignments near the end of this block. They need to be
-// added before any branch instruction, and also if the block ends
-// with a compare instruction followed by a branch instruction that we
-// may want to fuse, it's better to insert the new assignments before
-// the compare instruction. The tryOptimizedCmpxchgCmpBr() method
-// assumes this ordering of instructions.
+// This does part 2 of Phi lowering. For each Phi instruction at each out-edge,
+// create a corresponding assignment instruction, and add all the assignments
+// near the end of this block. They need to be added before any branch
+// instruction, and also if the block ends with a compare instruction followed
+// by a branch instruction that we may want to fuse, it's better to insert the
+// new assignments before the compare instruction. The
+// tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions.
//
-// Note that this transformation takes the Phi dest variables out of
-// SSA form, as there may be assignments to the dest variable in
-// multiple blocks.
+// Note that this transformation takes the Phi dest variables out of SSA form,
+// as there may be assignments to the dest variable in multiple blocks.
void CfgNode::placePhiStores() {
// Find the insertion point.
InstList::iterator InsertionPoint = Insts.end();
- // Every block must end in a terminator instruction, and therefore
- // must have at least one instruction, so it's valid to decrement
- // InsertionPoint (but assert just in case).
+ // Every block must end in a terminator instruction, and therefore must have
+ // at least one instruction, so it's valid to decrement InsertionPoint (but
+ // assert just in case).
assert(InsertionPoint != Insts.begin());
--InsertionPoint;
- // Confirm that InsertionPoint is a terminator instruction. Calling
- // getTerminatorEdges() on a non-terminator instruction will cause
- // an llvm_unreachable().
+ // Confirm that InsertionPoint is a terminator instruction. Calling
+ // getTerminatorEdges() on a non-terminator instruction will cause an
+ // llvm_unreachable().
(void)InsertionPoint->getTerminatorEdges();
// SafeInsertionPoint is always immediately before the terminator
- // instruction. If the block ends in a compare and conditional
- // branch, it's better to place the Phi store before the compare so
- // as not to interfere with compare/branch fusing. However, if the
- // compare instruction's dest operand is the same as the new
- // assignment statement's source operand, this can't be done due to
- // data dependences, so we need to fall back to the
- // SafeInsertionPoint. To illustrate:
+ // instruction. If the block ends in a compare and conditional branch, it's
+ // better to place the Phi store before the compare so as not to interfere
+ // with compare/branch fusing. However, if the compare instruction's dest
+ // operand is the same as the new assignment statement's source operand, this
+ // can't be done due to data dependences, so we need to fall back to the
+ // SafeInsertionPoint. To illustrate:
// ; <label>:95
// %97 = load i8* %96, align 1
// %98 = icmp ne i8 %97, 0
@@ -188,9 +182,8 @@
// %100 = %100_phi
// %101 = %101_phi
//
- // TODO(stichnot): It may be possible to bypass this whole
- // SafeInsertionPoint mechanism. If a source basic block ends in a
- // conditional branch:
+ // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint
+ // mechanism. If a source basic block ends in a conditional branch:
// labelSource:
// ...
// br i1 %foo, label %labelTrue, label %labelFalse
@@ -200,17 +193,17 @@
// then we actually know the constant i1 value of the Phi operand:
// labelTrue:
// %bar = phi i1 [ true, %labelSource ], ...
- // It seems that this optimization should be done by clang or opt,
- // but we could also do it here.
+ // It seems that this optimization should be done by clang or opt, but we
+ // could also do it here.
InstList::iterator SafeInsertionPoint = InsertionPoint;
- // Keep track of the dest variable of a compare instruction, so that
- // we insert the new instruction at the SafeInsertionPoint if the
- // compare's dest matches the Phi-lowered assignment's source.
+ // Keep track of the dest variable of a compare instruction, so that we
+ // insert the new instruction at the SafeInsertionPoint if the compare's dest
+ // matches the Phi-lowered assignment's source.
Variable *CmpInstDest = nullptr;
- // If the current insertion point is at a conditional branch
- // instruction, and the previous instruction is a compare
- // instruction, then we move the insertion point before the compare
- // instruction so as not to interfere with compare/branch fusing.
+ // If the current insertion point is at a conditional branch instruction, and
+ // the previous instruction is a compare instruction, then we move the
+ // insertion point before the compare instruction so as not to interfere with
+ // compare/branch fusing.
if (InstBr *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) {
if (!Branch->isUnconditional()) {
if (InsertionPoint != Insts.begin()) {
@@ -249,13 +242,12 @@
I.setDeleted();
}
-// Splits the edge from Pred to this node by creating a new node and
-// hooking up the in and out edges appropriately. (The EdgeIndex
-// parameter is only used to make the new node's name unique when
-// there are multiple edges between the same pair of nodes.) The new
-// node's instruction list is initialized to the empty list, with no
-// terminator instruction. There must not be multiple edges from Pred
-// to this node so all Inst::getTerminatorEdges implementations must
+// Splits the edge from Pred to this node by creating a new node and hooking up
+// the in and out edges appropriately. (The EdgeIndex parameter is only used to
+// make the new node's name unique when there are multiple edges between the
+// same pair of nodes.) The new node's instruction list is initialized to the
+// empty list, with no terminator instruction. There must not be multiple edges
+// from Pred to this node so all Inst::getTerminatorEdges implementations must
// not contain duplicates.
CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
CfgNode *NewNode = Func->makeNode();
@@ -267,8 +259,8 @@
if (BuildDefs::dump())
NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" +
std::to_string(EdgeIndex));
- // The new node is added to the end of the node list, and will later
- // need to be sorted into a reasonable topological order.
+ // The new node is added to the end of the node list, and will later need to
+ // be sorted into a reasonable topological order.
NewNode->setNeedsPlacement(true);
// Repoint Pred's out-edge.
bool Found = false;
@@ -319,31 +311,31 @@
} // end of anonymous namespace
-// This the "advanced" version of Phi lowering for a basic block, in contrast to
-// the simple version that lowers through assignments involving temporaries.
+// This the "advanced" version of Phi lowering for a basic block, in contrast
+// to the simple version that lowers through assignments involving temporaries.
//
// All Phi instructions in a basic block are conceptually executed in parallel.
// However, if we lower Phis early and commit to a sequential ordering, we may
// end up creating unnecessary interferences which lead to worse register
-// allocation. Delaying Phi scheduling until after register allocation can help
-// unless there are no free registers for shuffling registers or stack slots and
-// spilling becomes necessary.
+// allocation. Delaying Phi scheduling until after register allocation can help
+// unless there are no free registers for shuffling registers or stack slots
+// and spilling becomes necessary.
//
// The advanced Phi lowering starts by finding a topological sort of the Phi
-// instructions, where "A=B" comes before "B=C" due to the anti-dependence on B.
-// Preexisting register assignments are considered in the topological sort. If
-// a topological sort is not possible due to a cycle, the cycle is broken by
-// introducing a non-parallel temporary. For example, a cycle arising from a
-// permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being
+// instructions, where "A=B" comes before "B=C" due to the anti-dependence on
+// B. Preexisting register assignments are considered in the topological sort.
+// If a topological sort is not possible due to a cycle, the cycle is broken by
+// introducing a non-parallel temporary. For example, a cycle arising from a
+// permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being
// equal, prefer to schedule assignments with register-allocated Src operands
// earlier, in case that register becomes free afterwards, and prefer to
// schedule assignments with register-allocated Dest variables later, to keep
// that register free for longer.
//
// Once the ordering is determined, the Cfg edge is split and the assignment
-// list is lowered by the target lowering layer. Since the assignment lowering
+// list is lowered by the target lowering layer. Since the assignment lowering
// may create new infinite-weight temporaries, a follow-on register allocation
-// pass will be needed. To prepare for this, liveness (including live range
+// pass will be needed. To prepare for this, liveness (including live range
// calculation) of the split nodes needs to be calculated, and liveness of the
// original node need to be updated to "undo" the effects of the phi
// assignments.
@@ -355,7 +347,7 @@
// allocation pass is run, focusing only on pre-colored and infinite-weight
// variables, similar to Om1 register allocation (except without the need to
// specially compute these variables' live ranges, since they have already been
-// precisely calculated). The register allocator in this mode needs the ability
+// precisely calculated). The register allocator in this mode needs the ability
// to forcibly spill and reload registers in case none are naturally available.
void CfgNode::advancedPhiLowering() {
if (getPhis().empty())
@@ -403,17 +395,16 @@
Desc[I].Src = Src;
Desc[I].Processed = false;
Desc[I].NumPred = 0;
- // Cherry-pick any trivial assignments, so that they don't
- // contribute to the running complexity of the topological sort.
+ // Cherry-pick any trivial assignments, so that they don't contribute to
+ // the running complexity of the topological sort.
if (sameVarOrReg(Dest, Src)) {
Desc[I].Processed = true;
--Remaining;
if (Dest != Src)
- // If Dest and Src are syntactically the same, don't bother
- // adding the assignment, because in all respects it would
- // be redundant, and if Dest/Src are on the stack, the
- // target lowering may naively decide to lower it using a
- // temporary register.
+ // If Dest and Src are syntactically the same, don't bother adding
+ // the assignment, because in all respects it would be redundant, and
+ // if Dest/Src are on the stack, the target lowering may naively
+ // decide to lower it using a temporary register.
Split->appendInst(InstAssign::create(Func, Dest, Src));
}
}
@@ -427,8 +418,8 @@
if (Desc[J].Processed)
continue;
if (I != J) {
- // There shouldn't be two Phis with the same Dest variable
- // or register.
+ // There shouldn't be two Phis with the same Dest variable or
+ // register.
assert(!sameVarOrReg(Dest, Desc[J].Dest));
}
const Operand *Src = Desc[J].Src;
@@ -443,8 +434,7 @@
constexpr int32_t WeightNoPreds = 4;
// Prefer Src as a register because the register might free up.
constexpr int32_t WeightSrcIsReg = 2;
- // Prefer Dest not as a register because the register stays free
- // longer.
+ // Prefer Dest not as a register because the register stays free longer.
constexpr int32_t WeightDestNotReg = 1;
for (size_t I = 0; I < NumPhis; ++I) {
@@ -461,11 +451,10 @@
Desc[I].Weight = Weight;
}
- // Repeatedly choose and process the best candidate in the
- // topological sort, until no candidates remain. This
- // implementation is O(N^2) where N is the number of Phi
- // instructions, but with a small constant factor compared to a
- // likely implementation of O(N) topological sort.
+ // Repeatedly choose and process the best candidate in the topological
+ // sort, until no candidates remain. This implementation is O(N^2) where N
+ // is the number of Phi instructions, but with a small constant factor
+ // compared to a likely implementation of O(N) topological sort.
for (; Remaining; --Remaining) {
size_t BestIndex = 0;
int32_t BestWeight = -1;
@@ -488,9 +477,9 @@
// Break a cycle by introducing a temporary.
if (Desc[BestIndex].NumPred) {
bool Found = false;
- // If the target instruction "A=B" is part of a cycle, find
- // the "X=A" assignment in the cycle because it will have to
- // be rewritten as "X=tmp".
+ // If the target instruction "A=B" is part of a cycle, find the "X=A"
+ // assignment in the cycle because it will have to be rewritten as
+ // "X=tmp".
for (size_t J = 0; !Found && J < NumPhis; ++J) {
if (Desc[J].Processed)
continue;
@@ -510,9 +499,8 @@
// Now that a cycle (if any) has been broken, create the actual
// assignment.
Split->appendInst(InstAssign::create(Func, Dest, Src));
- // Update NumPred for all Phi assignments using this Phi's Src
- // as their Dest variable. Also update Weight if NumPred
- // dropped from 1 to 0.
+ // Update NumPred for all Phi assignments using this Phi's Src as their
+ // Dest variable. Also update Weight if NumPred dropped from 1 to 0.
if (auto Var = llvm::dyn_cast<Variable>(Src)) {
for (size_t I = 0; I < NumPhis; ++I) {
if (Desc[I].Processed)
@@ -532,10 +520,9 @@
}
}
-// Does address mode optimization. Pass each instruction to the
-// TargetLowering object. If it returns a new instruction
-// (representing the optimized address mode), then insert the new
-// instruction and delete the old.
+// Does address mode optimization. Pass each instruction to the TargetLowering
+// object. If it returns a new instruction (representing the optimized address
+// mode), then insert the new instruction and delete the old.
void CfgNode::doAddressOpt() {
TargetLowering *Target = Func->getTarget();
LoweringContext &Context = Target->getContext();
@@ -567,8 +554,8 @@
}
}
-// Drives the target lowering. Passes the current instruction and the
-// next non-deleted instruction for target lowering.
+// Drives the target lowering. Passes the current instruction and the next
+// non-deleted instruction for target lowering.
void CfgNode::genCode() {
TargetLowering *Target = Func->getTarget();
LoweringContext &Context = Target->getContext();
@@ -603,24 +590,23 @@
}
}
-// Performs liveness analysis on the block. Returns true if the
-// incoming liveness changed from before, false if it stayed the same.
-// (If it changes, the node's predecessors need to be processed
-// again.)
+// Performs liveness analysis on the block. Returns true if the incoming
+// liveness changed from before, false if it stayed the same. (If it changes,
+// the node's predecessors need to be processed again.)
bool CfgNode::liveness(Liveness *Liveness) {
SizeT NumVars = Liveness->getNumVarsInNode(this);
LivenessBV Live(NumVars);
LiveBeginEndMap *LiveBegin = nullptr;
LiveBeginEndMap *LiveEnd = nullptr;
- // Mark the beginning and ending of each variable's live range
- // with the sentinel instruction number 0.
+ // Mark the beginning and ending of each variable's live range with the
+ // sentinel instruction number 0.
if (Liveness->getMode() == Liveness_Intervals) {
LiveBegin = Liveness->getLiveBegin(this);
LiveEnd = Liveness->getLiveEnd(this);
LiveBegin->clear();
LiveEnd->clear();
- // Guess that the number of live ranges beginning is roughly the
- // number of instructions, and same for live ranges ending.
+ // Guess that the number of live ranges beginning is roughly the number of
+ // instructions, and same for live ranges ending.
LiveBegin->reserve(getInstCountEstimate());
LiveEnd->reserve(getInstCountEstimate());
}
@@ -643,9 +629,8 @@
continue;
I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd);
}
- // Process phis in forward order so that we can override the
- // instruction number to be that of the earliest phi instruction in
- // the block.
+ // Process phis in forward order so that we can override the instruction
+ // number to be that of the earliest phi instruction in the block.
SizeT NumNonDeadPhis = 0;
InstNumberT FirstPhiNumber = Inst::NumberSentinel;
for (Inst &I : Phis) {
@@ -657,18 +642,17 @@
++NumNonDeadPhis;
}
- // When using the sparse representation, after traversing the
- // instructions in the block, the Live bitvector should only contain
- // set bits for global variables upon block entry. We validate this
- // by shrinking the Live vector and then testing it against the
- // pre-shrunk version. (The shrinking is required, but the
- // validation is not.)
+ // When using the sparse representation, after traversing the instructions in
+ // the block, the Live bitvector should only contain set bits for global
+ // variables upon block entry. We validate this by shrinking the Live vector
+ // and then testing it against the pre-shrunk version. (The shrinking is
+ // required, but the validation is not.)
LivenessBV LiveOrig = Live;
Live.resize(Liveness->getNumGlobalVars());
if (Live != LiveOrig) {
if (BuildDefs::dump()) {
- // This is a fatal liveness consistency error. Print some
- // diagnostics and abort.
+ // This is a fatal liveness consistency error. Print some diagnostics and
+ // abort.
Ostream &Str = Func->getContext()->getStrDump();
Func->resetCurrentNode();
Str << "LiveOrig-Live =";
@@ -697,13 +681,12 @@
return Changed;
}
-// Once basic liveness is complete, compute actual live ranges. It is
-// assumed that within a single basic block, a live range begins at
-// most once and ends at most once. This is certainly true for pure
-// SSA form. It is also true once phis are lowered, since each
-// assignment to the phi-based temporary is in a different basic
-// block, and there is a single read that ends the live in the basic
-// block that contained the actual phi instruction.
+// Once basic liveness is complete, compute actual live ranges. It is assumed
+// that within a single basic block, a live range begins at most once and ends
+// at most once. This is certainly true for pure SSA form. It is also true once
+// phis are lowered, since each assignment to the phi-based temporary is in a
+// different basic block, and there is a single read that ends the live in the
+// basic block that contained the actual phi instruction.
void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum,
InstNumberT LastInstNum) {
TimerMarker T1(TimerStack::TT_liveRange, Func);
@@ -736,14 +719,13 @@
SizeT i1 = IBB == IBE ? NumVars : IBB->first;
SizeT i2 = IEB == IEE ? NumVars : IEB->first;
SizeT i = std::min(i1, i2);
- // i1 is the Variable number of the next MapBegin entry, and i2 is
- // the Variable number of the next MapEnd entry. If i1==i2, then
- // the Variable's live range begins and ends in this block. If
- // i1<i2, then i1's live range begins at instruction IBB->second
- // and extends through the end of the block. If i1>i2, then i2's
- // live range begins at the first instruction of the block and
- // ends at IEB->second. In any case, we choose the lesser of i1
- // and i2 and proceed accordingly.
+ // i1 is the Variable number of the next MapBegin entry, and i2 is the
+ // Variable number of the next MapEnd entry. If i1==i2, then the Variable's
+ // live range begins and ends in this block. If i1<i2, then i1's live range
+ // begins at instruction IBB->second and extends through the end of the
+ // block. If i1>i2, then i2's live range begins at the first instruction of
+ // the block and ends at IEB->second. In any case, we choose the lesser of
+ // i1 and i2 and proceed accordingly.
InstNumberT LB = i == i1 ? IBB->second : FirstInstNum;
InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1;
@@ -751,9 +733,9 @@
if (LB > LE) {
Var->addLiveRange(FirstInstNum, LE);
Var->addLiveRange(LB, LastInstNum + 1);
- // Assert that Var is a global variable by checking that its
- // liveness index is less than the number of globals. This
- // ensures that the LiveInAndOut[] access is valid.
+ // Assert that Var is a global variable by checking that its liveness
+ // index is less than the number of globals. This ensures that the
+ // LiveInAndOut[] access is valid.
assert(i < Liveness->getNumGlobalVars());
LiveInAndOut[i] = false;
} else {
@@ -774,8 +756,8 @@
}
// If this node contains only deleted instructions, and ends in an
-// unconditional branch, contract the node by repointing all its
-// in-edges to its successor.
+// unconditional branch, contract the node by repointing all its in-edges to
+// its successor.
void CfgNode::contractIfEmpty() {
if (InEdges.empty())
return;
@@ -795,10 +777,10 @@
Branch->setDeleted();
CfgNode *Successor = OutEdges.front();
- // Repoint all this node's in-edges to this node's successor, unless
- // this node's successor is actually itself (in which case the
- // statement "OutEdges.front()->InEdges.push_back(Pred)" could
- // invalidate the iterator over this->InEdges).
+ // Repoint all this node's in-edges to this node's successor, unless this
+ // node's successor is actually itself (in which case the statement
+ // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator
+ // over this->InEdges).
if (Successor != this) {
for (CfgNode *Pred : InEdges) {
for (CfgNode *&I : Pred->OutEdges) {
@@ -814,8 +796,8 @@
}
// Remove the in-edge to the successor to allow node reordering to make
- // better decisions. For example it's more helpful to place a node after
- // a reachable predecessor than an unreachable one (like the one we just
+ // better decisions. For example it's more helpful to place a node after a
+ // reachable predecessor than an unreachable one (like the one we just
// contracted).
Successor->InEdges.erase(
std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this));
@@ -826,10 +808,10 @@
void CfgNode::doBranchOpt(const CfgNode *NextNode) {
TargetLowering *Target = Func->getTarget();
// Find the first opportunity for branch optimization (which will be the last
- // instruction in the block) and stop. This is sufficient unless there is some
- // target lowering where we have the possibility of multiple optimizations per
- // block. Take care with switch lowering as there are multiple unconditional
- // branches and only the last can be deleted.
+ // instruction in the block) and stop. This is sufficient unless there is
+ // some target lowering where we have the possibility of multiple
+ // optimizations per block. Take care with switch lowering as there are
+ // multiple unconditional branches and only the last can be deleted.
for (Inst &I : reverse_range(Insts)) {
if (!I.isDeleted()) {
Target->doBranchOpt(&I, NextNode);
@@ -869,8 +851,8 @@
}
}
}
- // Sort the variables by regnum so they are always printed in a
- // familiar order.
+ // Sort the variables by regnum so they are always printed in a familiar
+ // order.
std::sort(LiveRegs.begin(), LiveRegs.end(),
[](const Variable *V1, const Variable *V2) {
return V1->getRegNum() < V2->getRegNum();
@@ -892,11 +874,11 @@
return;
bool First = true;
Variable *Dest = Instr->getDest();
- // Normally we increment the live count for the dest register. But
- // we shouldn't if the instruction's IsDestNonKillable flag is set,
- // because this means that the target lowering created this
- // instruction as a non-SSA assignment; i.e., a different, previous
- // instruction started the dest variable's live range.
+ // Normally we increment the live count for the dest register. But we
+ // shouldn't if the instruction's IsDestNonKillable flag is set, because this
+ // means that the target lowering created this instruction as a non-SSA
+ // assignment; i.e., a different, previous instruction started the dest
+ // variable's live range.
if (!Instr->isDestNonKillable() && Dest && Dest->hasReg())
++LiveRegCount[Dest->getRegNum()];
FOREACH_VAR_IN_INST(Var, *Instr) {
@@ -921,8 +903,8 @@
void updateStats(Cfg *Func, const Inst *I) {
if (!BuildDefs::dump())
return;
- // Update emitted instruction count, plus fill/spill count for
- // Variable operands without a physical register.
+ // Update emitted instruction count, plus fill/spill count for Variable
+ // operands without a physical register.
if (uint32_t Count = I->getEmitInstCount()) {
Func->getContext()->statsUpdateEmitted(Count);
if (Variable *Dest = I->getDest()) {
@@ -949,10 +931,10 @@
bool DecorateAsm =
Liveness && Func->getContext()->getFlags().getDecorateAsm();
Str << getAsmName() << ":\n";
- // LiveRegCount keeps track of the number of currently live
- // variables that each register is assigned to. Normally that would
- // be only 0 or 1, but the register allocator's AllowOverlap
- // inference allows it to be greater than 1 for short periods.
+ // LiveRegCount keeps track of the number of currently live variables that
+ // each register is assigned to. Normally that would be only 0 or 1, but the
+ // register allocator's AllowOverlap inference allows it to be greater than 1
+ // for short periods.
std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
if (DecorateAsm) {
constexpr bool IsLiveIn = true;
@@ -969,15 +951,14 @@
if (I.isDeleted())
continue;
if (I.isRedundantAssign()) {
- // Usually, redundant assignments end the live range of the src
- // variable and begin the live range of the dest variable, with
- // no net effect on the liveness of their register. However, if
- // the register allocator infers the AllowOverlap condition,
- // then this may be a redundant assignment that does not end the
- // src variable's live range, in which case the active variable
- // count for that register needs to be bumped. That normally
- // would have happened as part of emitLiveRangesEnded(), but
- // that isn't called for redundant assignments.
+ // Usually, redundant assignments end the live range of the src variable
+ // and begin the live range of the dest variable, with no net effect on
+ // the liveness of their register. However, if the register allocator
+ // infers the AllowOverlap condition, then this may be a redundant
+ // assignment that does not end the src variable's live range, in which
+ // case the active variable count for that register needs to be bumped.
+ // That normally would have happened as part of emitLiveRangesEnded(),
+ // but that isn't called for redundant assignments.
Variable *Dest = I.getDest();
if (DecorateAsm && Dest->hasReg() && !I.isLastUse(I.getSrc(0)))
++LiveRegCount[Dest->getRegNum()];
@@ -1010,41 +991,38 @@
BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {}
// Check whether we're currently within a bundle_lock region.
bool isInBundleLockRegion() const { return BundleLockStart != End; }
- // Check whether the current bundle_lock region has the align_to_end
- // option.
+ // Check whether the current bundle_lock region has the align_to_end option.
bool isAlignToEnd() const {
assert(isInBundleLockRegion());
return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
InstBundleLock::Opt_AlignToEnd;
}
- // Check whether the entire bundle_lock region falls within the same
- // bundle.
+ // Check whether the entire bundle_lock region falls within the same bundle.
bool isSameBundle() const {
assert(isInBundleLockRegion());
return SizeSnapshotPre == SizeSnapshotPost ||
(SizeSnapshotPre & BundleMaskHi) ==
((SizeSnapshotPost - 1) & BundleMaskHi);
}
- // Get the bundle alignment of the first instruction of the
- // bundle_lock region.
+ // Get the bundle alignment of the first instruction of the bundle_lock
+ // region.
intptr_t getPreAlignment() const {
assert(isInBundleLockRegion());
return SizeSnapshotPre & BundleMaskLo;
}
- // Get the bundle alignment of the first instruction past the
- // bundle_lock region.
+ // Get the bundle alignment of the first instruction past the bundle_lock
+ // region.
intptr_t getPostAlignment() const {
assert(isInBundleLockRegion());
return SizeSnapshotPost & BundleMaskLo;
}
- // Get the iterator pointing to the bundle_lock instruction, e.g. to
- // roll back the instruction iteration to that point.
+ // Get the iterator pointing to the bundle_lock instruction, e.g. to roll
+ // back the instruction iteration to that point.
InstList::const_iterator getBundleLockStart() const {
assert(isInBundleLockRegion());
return BundleLockStart;
}
- // Set up bookkeeping when the bundle_lock instruction is first
- // processed.
+ // Set up bookkeeping when the bundle_lock instruction is first processed.
void enterBundleLock(InstList::const_iterator I) {
assert(!isInBundleLockRegion());
BundleLockStart = I;
@@ -1053,18 +1031,16 @@
Target->snapshotEmitState();
assert(isInBundleLockRegion());
}
- // Update bookkeeping when the bundle_unlock instruction is
- // processed.
+ // Update bookkeeping when the bundle_unlock instruction is processed.
void enterBundleUnlock() {
assert(isInBundleLockRegion());
SizeSnapshotPost = Asm->getBufferSize();
}
- // Update bookkeeping when we are completely finished with the
- // bundle_lock region.
+ // Update bookkeeping when we are completely finished with the bundle_lock
+ // region.
void leaveBundleLockRegion() { BundleLockStart = End; }
- // Check whether the instruction sequence fits within the current
- // bundle, and if not, add nop padding to the end of the current
- // bundle.
+ // Check whether the instruction sequence fits within the current bundle, and
+ // if not, add nop padding to the end of the current bundle.
void padToNextBundle() {
assert(isInBundleLockRegion());
if (!isSameBundle()) {
@@ -1076,8 +1052,8 @@
assert(Asm->getBufferSize() == SizeSnapshotPre);
}
}
- // If align_to_end is specified, add padding such that the
- // instruction sequences ends precisely at a bundle boundary.
+ // If align_to_end is specified, add padding such that the instruction
+ // sequences ends precisely at a bundle boundary.
void padForAlignToEnd() {
assert(isInBundleLockRegion());
if (isAlignToEnd()) {
@@ -1098,8 +1074,8 @@
private:
Assembler *const Asm;
TargetLowering *const Target;
- // End is a sentinel value such that BundleLockStart==End implies
- // that we are not in a bundle_lock region.
+ // End is a sentinel value such that BundleLockStart==End implies that we are
+ // not in a bundle_lock region.
const InstList::const_iterator End;
InstList::const_iterator BundleLockStart;
const intptr_t BundleSize;
@@ -1116,9 +1092,9 @@
void CfgNode::emitIAS(Cfg *Func) const {
Func->setCurrentNode(this);
Assembler *Asm = Func->getAssembler<>();
- // TODO(stichnot): When sandboxing, defer binding the node label
- // until just before the first instruction is emitted, to reduce the
- // chance that a padding nop is a branch target.
+ // TODO(stichnot): When sandboxing, defer binding the node label until just
+ // before the first instruction is emitted, to reduce the chance that a
+ // padding nop is a branch target.
Asm->bindCfgNodeLabel(getIndex());
for (const Inst &I : Phis) {
if (I.isDeleted())
@@ -1138,33 +1114,33 @@
return;
}
- // The remainder of the function handles emission with sandboxing.
- // There are explicit bundle_lock regions delimited by bundle_lock
- // and bundle_unlock instructions. All other instructions are
- // treated as an implicit one-instruction bundle_lock region.
- // Emission is done twice for each bundle_lock region. The first
- // pass is a preliminary pass, after which we can figure out what
- // nop padding is needed, then roll back, and make the final pass.
+ // The remainder of the function handles emission with sandboxing. There are
+ // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock
+ // instructions. All other instructions are treated as an implicit
+ // one-instruction bundle_lock region. Emission is done twice for each
+ // bundle_lock region. The first pass is a preliminary pass, after which we
+ // can figure out what nop padding is needed, then roll back, and make the
+ // final pass.
//
- // Ideally, the first pass would be speculative and the second pass
- // would only be done if nop padding were needed, but the structure
- // of the integrated assembler makes it hard to roll back the state
- // of label bindings, label links, and relocation fixups. Instead,
- // the first pass just disables all mutation of that state.
+ // Ideally, the first pass would be speculative and the second pass would
+ // only be done if nop padding were needed, but the structure of the
+ // integrated assembler makes it hard to roll back the state of label
+ // bindings, label links, and relocation fixups. Instead, the first pass just
+ // disables all mutation of that state.
BundleEmitHelper Helper(Asm, Func->getTarget(), Insts);
InstList::const_iterator End = Insts.end();
- // Retrying indicates that we had to roll back to the bundle_lock
- // instruction to apply padding before the bundle_lock sequence.
+ // Retrying indicates that we had to roll back to the bundle_lock instruction
+ // to apply padding before the bundle_lock sequence.
bool Retrying = false;
for (InstList::const_iterator I = Insts.begin(); I != End; ++I) {
if (I->isDeleted() || I->isRedundantAssign())
continue;
if (llvm::isa<InstBundleLock>(I)) {
- // Set up the initial bundle_lock state. This should not happen
- // while retrying, because the retry rolls back to the
- // instruction following the bundle_lock instruction.
+ // Set up the initial bundle_lock state. This should not happen while
+ // retrying, because the retry rolls back to the instruction following
+ // the bundle_lock instruction.
assert(!Retrying);
Helper.enterBundleLock(I);
continue;
@@ -1175,16 +1151,16 @@
if (Retrying) {
// Make sure all instructions are in the same bundle.
assert(Helper.isSameBundle());
- // If align_to_end is specified, make sure the next
- // instruction begins the bundle.
+ // If align_to_end is specified, make sure the next instruction begins
+ // the bundle.
assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0);
Helper.leaveBundleLockRegion();
Retrying = false;
} else {
// This is the first pass, so roll back for the retry pass.
Helper.rollback();
- // Pad to the next bundle if the instruction sequence crossed
- // a bundle boundary.
+ // Pad to the next bundle if the instruction sequence crossed a bundle
+ // boundary.
Helper.padToNextBundle();
// Insert additional padding to make AlignToEnd work.
Helper.padForAlignToEnd();
@@ -1215,8 +1191,8 @@
}
}
- // Don't allow bundle locking across basic blocks, to keep the
- // backtracking mechanism simple.
+ // Don't allow bundle locking across basic blocks, to keep the backtracking
+ // mechanism simple.
assert(!Helper.isInBundleLockRegion());
assert(!Retrying);
}
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index a4744db..c6aa729 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -8,9 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares the CfgNode class, which represents a single
-/// basic block as its instruction list, in-edge list, and out-edge
-/// list.
+/// This file declares the CfgNode class, which represents a single basic block
+/// as its instruction list, in-edge list, and out-edge list.
///
//===----------------------------------------------------------------------===//
@@ -50,8 +49,8 @@
void setLoopNestDepth(SizeT NewDepth) { LoopNestDepth = NewDepth; }
SizeT getLoopNestDepth() const { return LoopNestDepth; }
- /// The HasReturn flag indicates that this node contains a return
- /// instruction and therefore needs an epilog.
+ /// The HasReturn flag indicates that this node contains a return instruction
+ /// and therefore needs an epilog.
void setHasReturn() { HasReturn = true; }
bool getHasReturn() const { return HasReturn; }
@@ -73,18 +72,17 @@
PhiList &getPhis() { return Phis; }
void appendInst(Inst *Inst);
void renumberInstructions();
- /// Rough and generally conservative estimate of the number of
- /// instructions in the block. It is updated when an instruction is
- /// added, but not when deleted. It is recomputed during
- /// renumberInstructions().
+ /// Rough and generally conservative estimate of the number of instructions in
+ /// the block. It is updated when an instruction is added, but not when
+ /// deleted. It is recomputed during renumberInstructions().
InstNumberT getInstCountEstimate() const { return InstCountEstimate; }
/// @}
/// \name Manage predecessors and successors.
/// @{
- /// Add a predecessor edge to the InEdges list for each of this
- /// node's successors.
+ /// Add a predecessor edge to the InEdges list for each of this node's
+ /// successors.
void computePredecessors();
void computeSuccessors();
CfgNode *splitIncomingEdge(CfgNode *Pred, SizeT InEdgeIndex);
diff --git a/src/IceClFlags.cpp b/src/IceClFlags.cpp
index 6c2e984..a27fb3f 100644
--- a/src/IceClFlags.cpp
+++ b/src/IceClFlags.cpp
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file defines commandline flags parsing.
-/// This currently relies on llvm::cl to parse. In the future, the minimal
-/// build can have a simpler parser.
+/// This file defines commandline flags parsing. This currently relies on
+/// llvm::cl to parse. In the future, the minimal build can have a simpler
+/// parser.
///
//===----------------------------------------------------------------------===//
@@ -99,16 +99,15 @@
cl::opt<bool> MockBoundsCheck("mock-bounds-check",
cl::desc("Mock bounds checking on loads/stores"));
-// Number of translation threads (in addition to the parser thread and
-// the emitter thread). The special case of 0 means purely
-// sequential, i.e. parser, translator, and emitter all within the
-// same single thread. (This may need a slight rework if we expand to
-// multiple parser or emitter threads.)
+// Number of translation threads (in addition to the parser thread and the
+// emitter thread). The special case of 0 means purely sequential, i.e. parser,
+// translator, and emitter all within the same single thread. (This may need a
+// slight rework if we expand to multiple parser or emitter threads.)
cl::opt<uint32_t> NumThreads(
"threads",
cl::desc("Number of translation threads (0 for purely sequential)"),
- // TODO(stichnot): Settle on a good default. Consider
- // something related to std::thread::hardware_concurrency().
+ // TODO(stichnot): Settle on a good default. Consider something related to
+ // std::thread::hardware_concurrency().
cl::init(2));
cl::opt<Ice::OptLevel> OLevel(cl::desc("Optimization level"),
@@ -125,9 +124,9 @@
cl::desc("Enable edge splitting for Phi lowering"),
cl::init(true));
-// TODO(stichnot): See if we can easily use LLVM's -rng-seed option
-// and implementation. I expect the implementation is different and
-// therefore the tests would need to be changed.
+// TODO(stichnot): See if we can easily use LLVM's -rng-seed option and
+// implementation. I expect the implementation is different and therefore the
+// tests would need to be changed.
cl::opt<unsigned long long>
RandomSeed("sz-seed", cl::desc("Seed the random number generator"),
cl::init(1));
@@ -255,10 +254,10 @@
"exit-success", cl::desc("Exit with success status, even if errors found"),
cl::init(false));
-// Note: While this flag isn't used in the minimal build, we keep this
-// flag so that tests can set this command-line flag without concern
-// to the type of build. We double check that this flag at runtime
-// to make sure the consistency is maintained.
+// Note: While this flag isn't used in the minimal build, we keep this flag so
+// that tests can set this command-line flag without concern to the type of
+// build. We double check that this flag at runtime to make sure the
+// consistency is maintained.
cl::opt<bool>
BuildOnRead("build-on-read",
cl::desc("Build ICE instructions when reading bitcode"),
@@ -413,8 +412,8 @@
::DisableTranslation = true;
Ice::VerboseMask VMask = Ice::IceV_None;
- // Don't generate verbose messages if routines
- // to dump messages are not available.
+ // Don't generate verbose messages if routines to dump messages are not
+ // available.
if (BuildDefs::dump()) {
for (unsigned i = 0; i != VerboseList.size(); ++i)
VMask |= VerboseList[i];
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index 89f5783..87e16cd 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -79,8 +79,8 @@
void setFunctionSections(bool NewValue) { FunctionSections = NewValue; }
bool getGenerateUnitTestMessages() const {
- // Note: If dump routines have been turned off, the error messages
- // will not be readable. Hence, turn off.
+ // Note: If dump routines have been turned off, the error messages will not
+ // be readable. Hence, turn off.
return !BuildDefs::dump() || GenerateUnitTestMessages;
}
void setGenerateUnitTestMessages(bool NewValue) {
diff --git a/src/IceCompileServer.cpp b/src/IceCompileServer.cpp
index b7bc72b..db0694b 100644
--- a/src/IceCompileServer.cpp
+++ b/src/IceCompileServer.cpp
@@ -37,9 +37,8 @@
namespace {
-// Define a SmallVector backed buffer as a data stream, so that it
-// can hold the generated binary version of the textual bitcode in the
-// input file.
+// Define a SmallVector backed buffer as a data stream, so that it can hold the
+// generated binary version of the textual bitcode in the input file.
class TextDataStreamer : public llvm::DataStreamer {
public:
TextDataStreamer() = default;
@@ -129,8 +128,8 @@
}
ELFStr.reset(new ELFStreamer(*FdOs.get()));
Os.reset(FdOs.release());
- // NaCl sets st_blksize to 0, and LLVM uses that to pick the
- // default preferred buffer size. Set to something non-zero.
+ // NaCl sets st_blksize to 0, and LLVM uses that to pick the default
+ // preferred buffer size. Set to something non-zero.
Os->SetBufferSize(1 << 14);
} break;
case FT_Asm:
diff --git a/src/IceCompileServer.h b/src/IceCompileServer.h
index e027cbb..8d99927 100644
--- a/src/IceCompileServer.h
+++ b/src/IceCompileServer.h
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares the compile server. Given a compiler implementation,
-/// it dispatches compile requests to the implementation.
+/// This file declares the compile server. Given a compiler implementation, it
+/// dispatches compile requests to the implementation.
///
//===----------------------------------------------------------------------===//
@@ -27,17 +27,17 @@
namespace Ice {
-/// A CompileServer awaits compile requests, and dispatches the requests
-/// to a given Compiler. Each request is paired with an input stream,
-/// a context (which has the output stream), and a set of arguments.
-/// The CompileServer takes over the current thread to listen to requests,
-/// and compile requests are handled on separate threads.
+/// A CompileServer awaits compile requests, and dispatches the requests to a
+/// given Compiler. Each request is paired with an input stream, a context
+/// (which has the output stream), and a set of arguments. The CompileServer
+/// takes over the current thread to listen to requests, and compile requests
+/// are handled on separate threads.
///
/// Currently, this only handles a single request.
///
-/// When run on the commandline, it receives and therefore dispatches
-/// the request immediately. When run in the browser, it blocks waiting
-/// for a request.
+/// When run on the commandline, it receives and therefore dispatches the
+/// request immediately. When run in the browser, it blocks waiting for a
+/// request.
class CompileServer {
CompileServer() = delete;
CompileServer(const CompileServer &) = delete;
diff --git a/src/IceCompiler.cpp b/src/IceCompiler.cpp
index 4d3bbd4..b4b6c89 100644
--- a/src/IceCompiler.cpp
+++ b/src/IceCompiler.cpp
@@ -52,8 +52,8 @@
{"minimal_build", BuildDefs::minimal()},
{"browser_mode", PNACL_BROWSER_TRANSLATOR}};
-// Validates values of build attributes. Prints them to Stream if
-// Stream is non-null.
+// Validates values of build attributes. Prints them to Stream if Stream is
+// non-null.
void validateAndGenerateBuildAttributes(Ostream *Stream) {
// List the supported targets.
if (Stream) {
@@ -100,7 +100,7 @@
}
// The Minimal build (specifically, when dump()/emit() are not implemented)
- // allows only --filetype=obj. Check here to avoid cryptic error messages
+ // allows only --filetype=obj. Check here to avoid cryptic error messages
// downstream.
if (!BuildDefs::dump() && Ctx.getFlags().getOutFileType() != FT_Elf) {
// TODO(stichnot): Access the actual command-line argument via
diff --git a/src/IceCompiler.h b/src/IceCompiler.h
index e121dbb..6239b9f 100644
--- a/src/IceCompiler.h
+++ b/src/IceCompiler.h
@@ -33,8 +33,8 @@
public:
Compiler() = default;
- /// Run the compiler with the given GlobalContext for compilation
- /// state. Upon error, the Context's error status will be set.
+ /// Run the compiler with the given GlobalContext for compilation state. Upon
+ /// error, the Context's error status will be set.
void run(const ClFlagsExtra &ExtraFlags, GlobalContext &Ctx,
std::unique_ptr<llvm::DataStreamer> &&InputStream);
};
diff --git a/src/IceConditionCodesARM32.h b/src/IceConditionCodesARM32.h
index d897a44..d739310 100644
--- a/src/IceConditionCodesARM32.h
+++ b/src/IceConditionCodesARM32.h
@@ -26,8 +26,8 @@
CondARM32 &operator=(const CondARM32 &) = delete;
public:
- /// An enum of codes used for conditional instructions. The enum value
- /// should match the value used to encode operands in binary instructions.
+ /// An enum of codes used for conditional instructions. The enum value should
+ /// match the value used to encode operands in binary instructions.
enum Cond {
#define X(tag, encode, opp, emit) tag = encode,
ICEINSTARM32COND_TABLE
diff --git a/src/IceConverter.cpp b/src/IceConverter.cpp
index 4450a79..a4d4f53 100644
--- a/src/IceConverter.cpp
+++ b/src/IceConverter.cpp
@@ -52,9 +52,9 @@
// Base class for converting LLVM to ICE.
// TODO(stichnot): Redesign Converter, LLVM2ICEConverter,
-// LLVM2ICEFunctionConverter, and LLVM2ICEGlobalsConverter with
-// respect to Translator. In particular, the unique_ptr ownership
-// rules in LLVM2ICEFunctionConverter.
+// LLVM2ICEFunctionConverter, and LLVM2ICEGlobalsConverter with respect to
+// Translator. In particular, the unique_ptr ownership rules in
+// LLVM2ICEFunctionConverter.
class LLVM2ICEConverter {
LLVM2ICEConverter() = delete;
LLVM2ICEConverter(const LLVM2ICEConverter &) = delete;
@@ -73,11 +73,11 @@
const Ice::TypeConverter TypeConverter;
};
-// Converter from LLVM functions to ICE. The entry point is the
-// convertFunction method.
+// Converter from LLVM functions to ICE. The entry point is the convertFunction
+// method.
//
-// Note: this currently assumes that the given IR was verified to be
-// valid PNaCl bitcode. Otherwise, the behavior is undefined.
+// Note: this currently assumes that the given IR was verified to be valid
+// PNaCl bitcode. Otherwise, the behavior is undefined.
class LLVM2ICEFunctionConverter : LLVM2ICEConverter {
LLVM2ICEFunctionConverter() = delete;
LLVM2ICEFunctionConverter(const LLVM2ICEFunctionConverter &) = delete;
@@ -107,10 +107,9 @@
Func->addArg(mapValueToIceVar(ArgI));
}
- // Make an initial pass through the block list just to resolve the
- // blocks in the original linearized order. Otherwise the ICE
- // linearized order will be affected by branch targets in
- // terminator instructions.
+ // Make an initial pass through the block list just to resolve the blocks
+ // in the original linearized order. Otherwise the ICE linearized order
+ // will be affected by branch targets in terminator instructions.
for (const BasicBlock &BBI : *F)
mapBasicBlockToNode(&BBI);
for (const BasicBlock &BBI : *F)
@@ -122,9 +121,8 @@
Converter.translateFcn(std::move(Func));
}
- // convertConstant() does not use Func or require it to be a valid
- // Ice::Cfg pointer. As such, it's suitable for e.g. constructing
- // global initializers.
+ // convertConstant() does not use Func or require it to be a valid Ice::Cfg
+ // pointer. As such, it's suitable for e.g. constructing global initializers.
Ice::Constant *convertConstant(const Constant *Const) {
if (const auto GV = dyn_cast<GlobalValue>(Const)) {
Ice::GlobalDeclaration *Decl = getConverter().getGlobalDeclaration(GV);
@@ -197,9 +195,8 @@
return IceTy;
}
- // Given an LLVM instruction and an operand number, produce the
- // Ice::Operand this refers to. If there's no such operand, return
- // nullptr.
+ // Given an LLVM instruction and an operand number, produce the Ice::Operand
+ // this refers to. If there's no such operand, return nullptr.
Ice::Operand *convertOperand(const Instruction *Inst, unsigned OpNum) {
if (OpNum >= Inst->getNumOperands()) {
return nullptr;
@@ -551,8 +548,8 @@
Ice::Variable *Dest = mapValueToIceVar(Inst);
Ice::Operand *CallTarget = convertValue(Inst->getCalledValue());
unsigned NumArgs = Inst->getNumArgOperands();
- // Note: Subzero doesn't (yet) do anything special with the Tail
- // flag in the bitcode, i.e. CallInst::isTailCall().
+ // Note: Subzero doesn't (yet) do anything special with the Tail flag in
+ // the bitcode, i.e. CallInst::isTailCall().
Ice::InstCall *NewInst = nullptr;
const Ice::Intrinsics::FullIntrinsicInfo *Info = nullptr;
@@ -649,8 +646,8 @@
// Converter from LLVM global variables to ICE. The entry point is the
// convertGlobalsToIce method.
//
-// Note: this currently assumes that the given IR was verified to be
-// valid PNaCl bitcode. Othewise, the behavior is undefined.
+// Note: this currently assumes that the given IR was verified to be valid
+// PNaCl bitcode. Otherwise, the behavior is undefined.
class LLVM2ICEGlobalsConverter : public LLVM2ICEConverter {
LLVM2ICEGlobalsConverter() = delete;
LLVM2ICEGlobalsConverter(const LLVM2ICEGlobalsConverter &) = delete;
@@ -661,15 +658,14 @@
explicit LLVM2ICEGlobalsConverter(Ice::Converter &Converter)
: LLVM2ICEConverter(Converter) {}
- /// Converts global variables, and their initializers into ICE
- /// global variable declarations, for module Mod. Returns the set of
- /// converted declarations.
+ /// Converts global variables, and their initializers into ICE global variable
+ /// declarations, for module Mod. Returns the set of converted declarations.
std::unique_ptr<Ice::VariableDeclarationList>
convertGlobalsToIce(Module *Mod);
private:
- // Adds the Initializer to the list of initializers for the Global
- // variable declaraation.
+ // Adds the Initializer to the list of initializers for the Global variable
+ // declaration.
void addGlobalInitializer(Ice::VariableDeclaration &Global,
const Constant *Initializer) {
const bool HasOffset = false;
@@ -678,15 +674,14 @@
}
// Adds Initializer to the list of initializers for Global variable
- // declaration. HasOffset is true only if Initializer is a
- // relocation initializer and Offset should be added to the
- // relocation.
+ // declaration. HasOffset is true only if Initializer is a relocation
+ // initializer and Offset should be added to the relocation.
void addGlobalInitializer(Ice::VariableDeclaration &Global,
const Constant *Initializer, bool HasOffset,
Ice::RelocOffsetT Offset);
- // Converts the given constant C to the corresponding integer
- // literal it contains.
+ // Converts the given constant C to the corresponding integer literal it
+ // contains.
Ice::RelocOffsetT getIntegerLiteralConstant(const Value *C) {
const auto CI = dyn_cast<ConstantInt>(C);
if (CI && CI->getType()->isIntegerTy(32))
diff --git a/src/IceConverter.h b/src/IceConverter.h
index 399e6f8..accc554 100644
--- a/src/IceConverter.h
+++ b/src/IceConverter.h
@@ -42,8 +42,8 @@
llvm::Module *getModule() const { return Mod; }
- /// Returns the global declaration associated with the corresponding
- /// global value V. If no such global address, generates fatal error.
+ /// Returns the global declaration associated with the corresponding global
+ /// value V. If no such global address, generates fatal error.
GlobalDeclaration *getGlobalDeclaration(const llvm::GlobalValue *V);
private:
@@ -56,9 +56,8 @@
/// getFlags().DefaultGlobalPrefix, if the prefix is non-empty.
void nameUnnamedGlobalVariables(llvm::Module *Mod);
- /// Walks module and generates names for unnamed functions using
- /// prefix getFlags().DefaultFunctionPrefix, if the prefix is
- /// non-empty.
+ /// Walks module and generates names for unnamed functions using prefix
+ /// getFlags().DefaultFunctionPrefix, if the prefix is non-empty.
void nameUnnamedFunctions(llvm::Module *Mod);
/// Converts functions to ICE, and then machine code.
diff --git a/src/IceDefs.h b/src/IceDefs.h
index d1ad81c..a38da03 100644
--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -9,7 +9,7 @@
///
/// \file
/// This file declares various useful types and classes that have widespread use
-/// across Subzero. Every Subzero source file is expected to include IceDefs.h.
+/// across Subzero. Every Subzero source file is expected to include IceDefs.h.
///
//===----------------------------------------------------------------------===//
@@ -99,8 +99,8 @@
}
// makeUnique should be used when memory is expected to be allocated from the
-// heap (as opposed to allocated from some Allocator.) It is intended to be used
-// instead of new.
+// heap (as opposed to allocated from some Allocator.) It is intended to be
+// used instead of new.
//
// The expected usage is as follows
//
@@ -140,8 +140,8 @@
using IceString = std::string;
using InstList = llvm::ilist<Inst>;
-// Ideally PhiList would be llvm::ilist<InstPhi>, and similar for
-// AssignList, but this runs into issues with SFINAE.
+// Ideally PhiList would be llvm::ilist<InstPhi>, and similar for AssignList,
+// but this runs into issues with SFINAE.
using PhiList = InstList;
using AssignList = InstList;
@@ -155,18 +155,18 @@
using FunctionDeclarationList = std::vector<FunctionDeclaration *>;
using VariableDeclarationList = std::vector<VariableDeclaration *>;
-/// SizeT is for holding small-ish limits like number of source
-/// operands in an instruction. It is used instead of size_t (which
-/// may be 64-bits wide) when we want to save space.
+/// SizeT is for holding small-ish limits like number of source operands in an
+/// instruction. It is used instead of size_t (which may be 64-bits wide) when
+/// we want to save space.
using SizeT = uint32_t;
-/// InstNumberT is for holding an instruction number. Instruction
-/// numbers are used for representing Variable live ranges.
+/// InstNumberT is for holding an instruction number. Instruction numbers are
+/// used for representing Variable live ranges.
using InstNumberT = int32_t;
-/// A LiveBeginEndMapEntry maps a Variable::Number value to an
-/// Inst::Number value, giving the instruction number that begins or
-/// ends a variable's live range.
+/// A LiveBeginEndMapEntry maps a Variable::Number value to an Inst::Number
+/// value, giving the instruction number that begins or ends a variable's live
+/// range.
using LiveBeginEndMapEntry = std::pair<SizeT, InstNumberT>;
using LiveBeginEndMap =
std::vector<LiveBeginEndMapEntry, CfgLocalAllocator<LiveBeginEndMapEntry>>;
@@ -175,9 +175,8 @@
using TimerStackIdT = uint32_t;
using TimerIdT = uint32_t;
-/// Use alignas(MaxCacheLineSize) to isolate variables/fields that
-/// might be contended while multithreading. Assumes the maximum cache
-/// line size is 64.
+/// Use alignas(MaxCacheLineSize) to isolate variables/fields that might be
+/// contended while multithreading. Assumes the maximum cache line size is 64.
enum { MaxCacheLineSize = 64 };
// Use ICE_CACHELINE_BOUNDARY to force the next field in a declaration
// list to be aligned to the next cache line.
@@ -191,15 +190,15 @@
enum { RelocAddrSize = 4 };
enum LivenessMode {
- /// Basic version of live-range-end calculation. Marks the last uses
- /// of variables based on dataflow analysis. Records the set of
- /// live-in and live-out variables for each block. Identifies and
- /// deletes dead instructions (primarily stores).
+ /// Basic version of live-range-end calculation. Marks the last uses of
+ /// variables based on dataflow analysis. Records the set of live-in and
+ /// live-out variables for each block. Identifies and deletes dead
+ /// instructions (primarily stores).
Liveness_Basic,
- /// In addition to Liveness_Basic, also calculate the complete
- /// live range for each variable in a form suitable for interference
- /// calculation and register allocation.
+ /// In addition to Liveness_Basic, also calculate the complete live range for
+ /// each variable in a form suitable for interference calculation and register
+ /// allocation.
Liveness_Intervals
};
@@ -244,10 +243,10 @@
enum ErrorCodes { EC_None = 0, EC_Args, EC_Bitcode, EC_Translation };
-/// Wrapper around std::error_code for allowing multiple errors to be
-/// folded into one. The current implementation keeps track of the
-/// first error, which is likely to be the most useful one, and this
-/// could be extended to e.g. collect a vector of errors.
+/// Wrapper around std::error_code for allowing multiple errors to be folded
+/// into one. The current implementation keeps track of the first error, which
+/// is likely to be the most useful one, and this could be extended to e.g.
+/// collect a vector of errors.
class ErrorCode : public std::error_code {
ErrorCode(const ErrorCode &) = delete;
ErrorCode &operator=(const ErrorCode &) = delete;
diff --git a/src/IceELFObjectWriter.cpp b/src/IceELFObjectWriter.cpp
index 9e1d44e..7456856 100644
--- a/src/IceELFObjectWriter.cpp
+++ b/src/IceELFObjectWriter.cpp
@@ -105,10 +105,9 @@
ELFRelocationSection *
ELFObjectWriter::createRelocationSection(const ELFSection *RelatedSection) {
- // Choice of RELA vs REL is actually separate from elf64 vs elf32,
- // but in practice we've only had .rela for elf64 (x86-64).
- // In the future, the two properties may need to be decoupled
- // and the ShEntSize can vary more.
+ // Choice of RELA vs REL is actually separate from elf64 vs elf32, but in
+ // practice we've only had .rela for elf64 (x86-64). In the future, the two
+ // properties may need to be decoupled and the ShEntSize can vary more.
const Elf64_Word ShType = ELF64 ? SHT_RELA : SHT_REL;
IceString RelPrefix = ELF64 ? ".rela" : ".rel";
IceString RelSectionName = RelPrefix + RelatedSection->getName();
@@ -158,8 +157,8 @@
}
void ELFObjectWriter::assignSectionNumbersInfo(SectionList &AllSections) {
- // Go through each section, assigning them section numbers and
- // and fill in the size for sections that aren't incrementally updated.
+ // Go through each section, assigning them section numbers and and fill in
+ // the size for sections that aren't incrementally updated.
assert(!SectionNumbersAssigned);
SizeT CurSectionNumber = 0;
NullSection->setNumber(CurSectionNumber++);
@@ -233,8 +232,8 @@
RelSection = RelTextSections[0];
}
RelocOffsetT OffsetInSection = Section->getCurrentSize();
- // Function symbols are set to 0 size in the symbol table,
- // in contrast to data symbols which have a proper size.
+ // Function symbols are set to 0 size in the symbol table, in contrast to
+ // data symbols which have a proper size.
SizeT SymbolSize = 0;
Section->appendData(Str, Asm->getBufferView());
uint8_t SymbolType;
@@ -268,9 +267,8 @@
return ELFObjectWriter::BSS;
}
-// Partition the Vars list by SectionType into VarsBySection.
-// If TranslateOnly is non-empty, then only the TranslateOnly variable
-// is kept for emission.
+// Partition the Vars list by SectionType into VarsBySection. If TranslateOnly
+// is non-empty, then only the TranslateOnly variable is kept for emission.
void partitionGlobalsBySection(const VariableDeclarationList &Vars,
VariableDeclarationList VarsBySection[],
const IceString &TranslateOnly) {
@@ -440,8 +438,8 @@
void ELFObjectWriter::writeELFHeaderInternal(Elf64_Off SectionHeaderOffset,
SizeT SectHeaderStrIndex,
SizeT NumSections) {
- // Write the e_ident: magic number, class, etc.
- // The e_ident is byte order and ELF class independent.
+ // Write the e_ident: magic number, class, etc. The e_ident is byte order and
+ // ELF class independent.
Str.writeBytes(llvm::StringRef(ElfMagic, strlen(ElfMagic)));
Str.write8(IsELF64 ? ELFCLASS64 : ELFCLASS32);
Str.write8(ELFDATA2LSB);
@@ -451,21 +449,21 @@
Str.write8(ELF_ABIVersion);
Str.writeZeroPadding(EI_NIDENT - EI_PAD);
- // TODO(jvoung): Handle and test > 64K sections. See the generic ABI doc:
- // https://refspecs.linuxbase.org/elf/gabi4+/ch4.eheader.html
- // e_shnum should be 0 and then actual number of sections is
- // stored in the sh_size member of the 0th section.
+ // TODO(jvoung): Handle and test > 64K sections. See the generic ABI doc:
+ // https://refspecs.linuxbase.org/elf/gabi4+/ch4.eheader.html e_shnum should
+ // be 0 and then actual number of sections is stored in the sh_size member of
+ // the 0th section.
assert(NumSections < SHN_LORESERVE);
assert(SectHeaderStrIndex < SHN_LORESERVE);
const TargetArch Arch = Ctx.getFlags().getTargetArch();
- // Write the rest of the file header, which does depend on byte order
- // and ELF class.
+ // Write the rest of the file header, which does depend on byte order and ELF
+ // class.
Str.writeLE16(ET_REL); // e_type
Str.writeLE16(getELFMachine(Ctx.getFlags().getTargetArch())); // e_machine
Str.writeELFWord<IsELF64>(1); // e_version
- // Since this is for a relocatable object, there is no entry point,
- // and no program headers.
+ // Since this is for a relocatable object, there is no entry point, and no
+ // program headers.
Str.writeAddrOrOffset<IsELF64>(0); // e_entry
Str.writeAddrOrOffset<IsELF64>(0); // e_phoff
Str.writeAddrOrOffset<IsELF64>(SectionHeaderOffset); // e_shoff
@@ -505,8 +503,8 @@
SecStrBuf.str(), SHT_PROGBITS, ShFlags, Align, WriteAmt);
RODataSections.push_back(Section);
SizeT OffsetInSection = 0;
- // The symbol table entry doesn't need to know the defined symbol's
- // size since this is in a section with a fixed Entry Size.
+ // The symbol table entry doesn't need to know the defined symbol's size
+ // since this is in a section with a fixed Entry Size.
const SizeT SymbolSize = 0;
Section->setFileOffset(alignFileOffset(Align));
@@ -541,11 +539,11 @@
Section->setSize(OffsetInSection);
}
-// Instantiate known needed versions of the template, since we are
-// defining the function in the .cpp file instead of the .h file.
-// We may need to instantiate constant pools for integers as well
-// if we do constant-pooling of large integers to remove them
-// from the instruction stream (fewer bytes controlled by an attacker).
+// Instantiate known needed versions of the template, since we are defining the
+// function in the .cpp file instead of the .h file. We may need to instantiate
+// constant pools for integers as well if we do constant-pooling of large
+// integers to remove them from the instruction stream (fewer bytes controlled
+// by an attacker).
template void ELFObjectWriter::writeConstantPool<ConstantFloat>(Type Ty);
template void ELFObjectWriter::writeConstantPool<ConstantDouble>(Type Ty);
diff --git a/src/IceELFObjectWriter.h b/src/IceELFObjectWriter.h
index c1bfb74..60ed60c 100644
--- a/src/IceELFObjectWriter.h
+++ b/src/IceELFObjectWriter.h
@@ -24,11 +24,11 @@
namespace Ice {
-/// Higher level ELF object writer. Manages section information and writes
-/// the final ELF object. The object writer will write to file the code
-/// and data as it is being defined (rather than keep a copy).
-/// After all definitions are written out, it will finalize the bookkeeping
-/// sections and write them out. Expected usage:
+/// Higher level ELF object writer. Manages section information and writes the
+/// final ELF object. The object writer will write to file the code and data as
+/// it is being defined (rather than keep a copy). After all definitions are
+/// written out, it will finalize the bookkeeping sections and write them out.
+/// Expected usage:
///
/// (1) writeInitialELFHeader (invoke once)
/// (2) writeDataSection (may be invoked multiple times, as long as
@@ -38,9 +38,9 @@
/// (5) setUndefinedSyms (invoke once)
/// (6) writeNonUserSections (invoke once)
///
-/// The requirement for writeDataSection to be invoked only once can
-/// be relaxed if using -fdata-sections. The requirement to invoke only once
-/// without -fdata-sections is so that variables that belong to each possible
+/// The requirement for writeDataSection to be invoked only once can be relaxed
+/// if using -fdata-sections. The requirement to invoke only once without
+/// -fdata-sections is so that variables that belong to each possible
/// SectionType are contiguous in the file. With -fdata-sections, each global
/// variable is in a separate section and therefore the sections will be
/// trivially contiguous.
@@ -53,27 +53,27 @@
ELFObjectWriter(GlobalContext &Ctx, ELFStreamer &Out);
/// Write the initial ELF header. This is just to reserve space in the ELF
- /// file. Reserving space allows the other functions to write text
- /// and data directly to the file and get the right file offsets.
+ /// file. Reserving space allows the other functions to write text and data
+ /// directly to the file and get the right file offsets.
void writeInitialELFHeader();
- /// Copy initializer data for globals to file and note the offset and size
- /// of each global's definition in the symbol table.
- /// Use the given target's RelocationKind for any relocations.
+ /// Copy initializer data for globals to file and note the offset and size of
+ /// each global's definition in the symbol table. Use the given target's
+ /// RelocationKind for any relocations.
void writeDataSection(const VariableDeclarationList &Vars,
FixupKind RelocationKind,
const IceString &SectionSuffix);
/// Copy data of a function's text section to file and note the offset of the
- /// symbol's definition in the symbol table.
- /// Copy the text fixups for use after all functions are written.
- /// The text buffer and fixups are extracted from the Assembler object.
+ /// symbol's definition in the symbol table. Copy the text fixups for use
+ /// after all functions are written. The text buffer and fixups are extracted
+ /// from the Assembler object.
void writeFunctionCode(const IceString &FuncName, bool IsInternal,
const Assembler *Asm);
- /// Queries the GlobalContext for constant pools of the given type
- /// and writes out read-only data sections for those constants. This also
- /// fills the symbol table with labels for each constant pool entry.
+ /// Queries the GlobalContext for constant pools of the given type and writes
+ /// out read-only data sections for those constants. This also fills the
+ /// symbol table with labels for each constant pool entry.
template <typename ConstType> void writeConstantPool(Type Ty);
/// Write a jump table and register fixups for the target addresses.
@@ -82,12 +82,12 @@
/// Populate the symbol table with a list of external/undefined symbols.
void setUndefinedSyms(const ConstantList &UndefSyms);
- /// Do final layout and write out the rest of the object file.
- /// Finally, patch up the initial ELF header with the final info.
+ /// Do final layout and write out the rest of the object file. Finally, patch
+ /// up the initial ELF header with the final info.
void writeNonUserSections();
- /// Which type of ELF section a global variable initializer belongs to.
- /// This is used as an array index so should start at 0 and be contiguous.
+ /// Which type of ELF section a global variable initializer belongs to. This
+ /// is used as an array index so should start at 0 and be contiguous.
enum SectionType { ROData = 0, Data, BSS, NumSectionTypes };
private:
@@ -120,25 +120,25 @@
Elf64_Xword ShFlags, Elf64_Xword ShAddralign,
Elf64_Xword ShEntsize);
- /// Create a relocation section, given the related section
- /// (e.g., .text, .data., .rodata).
+ /// Create a relocation section, given the related section (e.g., .text,
+ /// .data., .rodata).
ELFRelocationSection *
createRelocationSection(const ELFSection *RelatedSection);
- /// Align the file position before writing out a section's data,
- /// and return the position of the file.
+ /// Align the file position before writing out a section's data, and return
+ /// the position of the file.
Elf64_Off alignFileOffset(Elf64_Xword Align);
- /// Assign an ordering / section numbers to each section.
- /// Fill in other information that is only known near the end
- /// (such as the size, if it wasn't already incrementally updated).
- /// This then collects all sections in the decided order, into one vector,
- /// for conveniently writing out all of the section headers.
+ /// Assign an ordering / section numbers to each section. Fill in other
+ /// information that is only known near the end (such as the size, if it
+ /// wasn't already incrementally updated). This then collects all sections in
+ /// the decided order, into one vector, for conveniently writing out all of
+ /// the section headers.
void assignSectionNumbersInfo(SectionList &AllSections);
- /// This function assigns .foo and .rel.foo consecutive section numbers.
- /// It also sets the relocation section's sh_info field to the related
- /// section's number.
+ /// This function assigns .foo and .rel.foo consecutive section numbers. It
+ /// also sets the relocation section's sh_info field to the related section's
+ /// number.
template <typename UserSectionList>
void assignRelSectionNumInPairs(SizeT &CurSectionNumber,
UserSectionList &UserSections,
@@ -156,9 +156,9 @@
FixupKind RelocationKind,
const IceString &SectionSuffix);
- /// Write the final relocation sections given the final symbol table.
- /// May also be able to seek around the file and resolve function calls
- /// that are for functions within the same section.
+ /// Write the final relocation sections given the final symbol table. May also
+ /// be able to seek around the file and resolve function calls that are for
+ /// functions within the same section.
void writeAllRelocationSections();
void writeRelocationSections(RelSectionList &RelSections);
diff --git a/src/IceELFSection.cpp b/src/IceELFSection.cpp
index 7893354..3e33c99 100644
--- a/src/IceELFSection.cpp
+++ b/src/IceELFSection.cpp
@@ -82,8 +82,8 @@
// Symbol tables.
void ELFSymbolTableSection::createNullSymbol(ELFSection *NullSection) {
- // The first entry in the symbol table should be a NULL entry,
- // so make sure the map is still empty.
+ // The first entry in the symbol table should be a NULL entry, so make sure
+ // the map is still empty.
assert(LocalSymbols.empty());
const IceString NullSymName("");
createDefinedSym(NullSymName, STT_NOTYPE, STB_LOCAL, NullSection, 0, 0);
@@ -208,8 +208,8 @@
assert(StringIndex.second == UnknownIndex);
llvm::StringRef Cur = llvm::StringRef(StringIndex.first);
if (Prev.endswith(Cur)) {
- // Prev is already in the StringData, and Cur is shorter than Prev
- // based on the sort.
+ // Prev is already in the StringData, and Cur is shorter than Prev based
+ // on the sort.
StringIndex.second = StringData.size() - Cur.size() - 1;
continue;
}
diff --git a/src/IceELFSection.h b/src/IceELFSection.h
index 92dc02b..636f3b4 100644
--- a/src/IceELFSection.h
+++ b/src/IceELFSection.h
@@ -36,15 +36,15 @@
public:
virtual ~ELFSection() = default;
- /// Sentinel value for a section number/index for before the final
- /// section index is actually known. The dummy NULL section will be assigned
- /// number 0, and it is referenced by the dummy 0-th symbol in the symbol
- /// table, so use max() instead of 0.
+ /// Sentinel value for a section number/index for before the final section
+ /// index is actually known. The dummy NULL section will be assigned number 0,
+ /// and it is referenced by the dummy 0-th symbol in the symbol table, so use
+ /// max() instead of 0.
enum { NoSectionNumber = std::numeric_limits<SizeT>::max() };
- /// Constructs an ELF section, filling in fields that will be known
- /// once the *type* of section is decided. Other fields may be updated
- /// incrementally or only after the program is completely defined.
+ /// Constructs an ELF section, filling in fields that will be known once the
+ /// *type* of section is decided. Other fields may be updated incrementally or
+ /// only after the program is completely defined.
ELFSection(const IceString &Name, Elf64_Word ShType, Elf64_Xword ShFlags,
Elf64_Xword ShAddralign, Elf64_Xword ShEntsize)
: Name(Name), Header() {
@@ -84,8 +84,8 @@
template <bool IsELF64> void writeHeader(ELFStreamer &Str);
protected:
- /// Name of the section in convenient string form (instead of a index
- /// into the Section Header String Table, which is not known till later).
+ /// Name of the section in convenient string form (instead of a index into the
+ /// Section Header String Table, which is not known till later).
const IceString Name;
// The fields of the header. May only be partially initialized, but should
@@ -96,8 +96,8 @@
SizeT Number = NoSectionNumber;
};
-/// Models text/code sections. Code is written out incrementally and the
-/// size of the section is then updated incrementally.
+/// Models text/code sections. Code is written out incrementally and the size of
+/// the section is then updated incrementally.
class ELFTextSection : public ELFSection {
ELFTextSection() = delete;
ELFTextSection(const ELFTextSection &) = delete;
@@ -109,9 +109,9 @@
void appendData(ELFStreamer &Str, const llvm::StringRef MoreData);
};
-/// Models data/rodata sections. Data is written out incrementally and the
-/// size of the section is then updated incrementally.
-/// Some rodata sections may have fixed entsize and duplicates may be mergeable.
+/// Models data/rodata sections. Data is written out incrementally and the size
+/// of the section is then updated incrementally. Some rodata sections may have
+/// fixed entsize and duplicates may be mergeable.
class ELFDataSection : public ELFSection {
ELFDataSection() = delete;
ELFDataSection(const ELFDataSection &) = delete;
@@ -128,8 +128,8 @@
RelocOffsetT RelocOffset);
/// Pad the next section offset for writing data elements to the requested
- /// alignment. If the section is NOBITS then do not actually write out
- /// the padding and only update the section size.
+ /// alignment. If the section is NOBITS then do not actually write out the
+ /// padding and only update the section size.
void padToAlignment(ELFStreamer &Str, Elf64_Xword Align);
};
@@ -141,8 +141,8 @@
ELFSection *Section;
SizeT Number;
- /// Sentinel value for symbols that haven't been assigned a number yet.
- /// The dummy 0-th symbol will be assigned number 0, so don't use that.
+ /// Sentinel value for symbols that haven't been assigned a number yet. The
+ /// dummy 0-th symbol will be assigned number 0, so don't use that.
enum { UnknownNumber = std::numeric_limits<SizeT>::max() };
void setNumber(SizeT N) {
@@ -170,16 +170,15 @@
: ELFSection(Name, ShType, ShFlags, ShAddralign, ShEntsize),
NullSymbol(nullptr) {}
- /// Create initial entry for a symbol when it is defined.
- /// Each entry should only be defined once.
- /// We might want to allow Name to be a dummy name initially, then
- /// get updated to the real thing, since Data initializers are read
- /// before the bitcode's symbol table is read.
+ /// Create initial entry for a symbol when it is defined. Each entry should
+ /// only be defined once. We might want to allow Name to be a dummy name
+ /// initially, then get updated to the real thing, since Data initializers are
+ /// read before the bitcode's symbol table is read.
void createDefinedSym(const IceString &Name, uint8_t Type, uint8_t Binding,
ELFSection *Section, RelocOffsetT Offset, SizeT Size);
- /// Note that a symbol table entry needs to be created for the given
- /// symbol because it is undefined.
+ /// Note that a symbol table entry needs to be created for the given symbol
+ /// because it is undefined.
void noteUndefinedSym(const IceString &Name, ELFSection *NullSection);
const ELFSym *findSymbol(const IceString &Name) const;
@@ -198,8 +197,8 @@
void writeData(ELFStreamer &Str, bool IsELF64);
private:
- // Map from symbol name to its symbol information.
- // This assumes symbols are unique across all sections.
+ // Map from symbol name to its symbol information. This assumes symbols are
+ // unique across all sections.
using SymtabKey = IceString;
using SymMap = std::map<SymtabKey, ELFSym>;
@@ -207,8 +206,8 @@
void writeSymbolMap(ELFStreamer &Str, const SymMap &Map);
const ELFSym *NullSymbol;
- // Keep Local and Global symbols separate, since the sh_info needs to
- // know the index of the last LOCAL.
+ // Keep Local and Global symbols separate, since the sh_info needs to know
+ // the index of the last LOCAL.
SymMap LocalSymbols;
SymMap GlobalSymbols;
};
@@ -231,8 +230,8 @@
RelatedSection = Section;
}
- /// Track additional relocations which start out relative to offset 0,
- /// but should be adjusted to be relative to BaseOff.
+ /// Track additional relocations which start out relative to offset 0, but
+ /// should be adjusted to be relative to BaseOff.
void addRelocations(RelocOffsetT BaseOff, const FixupRefList &FixupRefs);
/// Track a single additional relocation.
@@ -251,12 +250,11 @@
FixupList Fixups;
};
-/// Models a string table. The user will build the string table by
-/// adding strings incrementally. At some point, all strings should be
-/// known and doLayout() should be called. After that, no other
-/// strings may be added. However, the final offsets of the strings
-/// can be discovered and used to fill out section headers and symbol
-/// table entries.
+/// Models a string table. The user will build the string table by adding
+/// strings incrementally. At some point, all strings should be known and
+/// doLayout() should be called. After that, no other strings may be added.
+/// However, the final offsets of the strings can be discovered and used to fill
+/// out section headers and symbol table entries.
class ELFStringTableSection : public ELFSection {
ELFStringTableSection() = delete;
ELFStringTableSection(const ELFStringTableSection &) = delete;
@@ -271,12 +269,12 @@
/// Finalizes the layout of the string table and fills in the section Data.
void doLayout();
- /// The first byte of the string table should be \0, so it is an
- /// invalid index. Indices start out as unknown until layout is complete.
+ /// The first byte of the string table should be \0, so it is an invalid
+ /// index. Indices start out as unknown until layout is complete.
enum { UnknownIndex = 0 };
- /// Grabs the final index of a string after layout. Returns UnknownIndex
- /// if the string's index is not found.
+ /// Grabs the final index of a string after layout. Returns UnknownIndex if
+ /// the string's index is not found.
size_t getIndex(const IceString &Str) const;
llvm::StringRef getSectionData() const {
@@ -290,19 +288,19 @@
private:
bool isLaidOut() const { return !StringData.empty(); }
- /// Strings can share a string table entry if they share the same
- /// suffix. E.g., "pop" and "lollipop" can both use the characters
- /// in "lollipop", but "pops" cannot, and "unpop" cannot either.
- /// Though, "pop", "lollipop", and "unpop" share "pop" as the suffix,
- /// "pop" can only share the characters with one of them.
+ /// Strings can share a string table entry if they share the same suffix.
+ /// E.g., "pop" and "lollipop" can both use the characters in "lollipop", but
+ /// "pops" cannot, and "unpop" cannot either. Though, "pop", "lollipop", and
+ /// "unpop" share "pop" as the suffix, "pop" can only share the characters
+ /// with one of them.
struct SuffixComparator {
bool operator()(const IceString &StrA, const IceString &StrB) const;
};
using StringToIndexType = std::map<IceString, size_t, SuffixComparator>;
- /// Track strings to their index. Index will be UnknownIndex if not
- /// yet laid out.
+ /// Track strings to their index. Index will be UnknownIndex if not yet laid
+ /// out.
StringToIndexType StringToIndexMap;
using RawDataType = std::vector<uint8_t>;
diff --git a/src/IceELFStreamer.h b/src/IceELFStreamer.h
index 93051b9..ab99891 100644
--- a/src/IceELFStreamer.h
+++ b/src/IceELFStreamer.h
@@ -20,8 +20,8 @@
namespace Ice {
-/// Low level writer that can that can handle ELFCLASS32/64.
-/// Little endian only for now.
+/// Low level writer that can that can handle ELFCLASS32/64. Little endian only
+/// for now.
class ELFStreamer {
ELFStreamer() = delete;
ELFStreamer(const ELFStreamer &) = delete;
diff --git a/src/IceFixups.cpp b/src/IceFixups.cpp
index ff7916c..a86d985 100644
--- a/src/IceFixups.cpp
+++ b/src/IceFixups.cpp
@@ -40,8 +40,8 @@
else
Str << Ctx->mangleName(CR->getName());
} else {
- // NOTE: currently only float/doubles are put into constant pools.
- // In the future we may put integers as well.
+ // NOTE: currently only float/doubles are put into constant pools. In the
+ // future we may put integers as well.
assert(llvm::isa<ConstantFloat>(C) || llvm::isa<ConstantDouble>(C));
C->emitPoolLabel(Str);
}
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index b4da1b6..7b7183d 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file defines aspects of the compilation that persist across
-/// multiple functions.
+/// This file defines aspects of the compilation that persist across multiple
+/// functions.
///
//===----------------------------------------------------------------------===//
@@ -48,9 +48,9 @@
namespace {
-// Define the key comparison function for the constant pool's
-// unordered_map, but only for key types of interest: integer types,
-// floating point types, and the special RelocatableTuple.
+// Define the key comparison function for the constant pool's unordered_map,
+// but only for key types of interest: integer types, floating point types, and
+// the special RelocatableTuple.
template <typename KeyType, class Enable = void> struct KeyCompare {};
template <typename KeyType>
@@ -70,9 +70,9 @@
}
};
-// Define a key comparison function for sorting the constant pool's
-// values after they are dumped to a vector. This covers integer
-// types, floating point types, and ConstantRelocatable values.
+// Define a key comparison function for sorting the constant pool's values
+// after they are dumped to a vector. This covers integer types, floating point
+// types, and ConstantRelocatable values.
template <typename ValueType, class Enable = void> struct KeyCompareLess {};
template <typename ValueType>
@@ -601,8 +601,8 @@
assert(OldName[OldPos - 1] == 'S');
assert(OldName[OldPos + Length] == '_');
if (AllZs) {
- // Replace N 'Z' characters with a '0' (if N=0) or '1' (if
- // N>0) followed by N '0' characters.
+ // Replace N 'Z' characters with a '0' (if N=0) or '1' (if N>0)
+ // followed by N '0' characters.
NewName[NewPos++] = (Length ? '1' : '0');
for (size_t i = 0; i < Length; ++i) {
NewName[NewPos++] = '0';
@@ -642,16 +642,15 @@
OldName = NewName;
}
-// In this context, name mangling means to rewrite a symbol using a
-// given prefix. For a C++ symbol, nest the original symbol inside
-// the "prefix" namespace. For other symbols, just prepend the
-// prefix.
+// In this context, name mangling means to rewrite a symbol using a given
+// prefix. For a C++ symbol, nest the original symbol inside the "prefix"
+// namespace. For other symbols, just prepend the prefix.
IceString GlobalContext::mangleName(const IceString &Name) const {
- // An already-nested name like foo::bar() gets pushed down one
- // level, making it equivalent to Prefix::foo::bar().
+ // An already-nested name like foo::bar() gets pushed down one level, making
+ // it equivalent to Prefix::foo::bar().
// _ZN3foo3barExyz ==> _ZN6Prefix3foo3barExyz
- // A non-nested but mangled name like bar() gets nested, making it
- // equivalent to Prefix::bar().
+ // A non-nested but mangled name like bar() gets nested, making it equivalent
+ // to Prefix::bar().
// _Z3barxyz ==> ZN6Prefix3barExyz
// An unmangled, extern "C" style name, gets a simple prefix:
// bar ==> Prefixbar
@@ -671,28 +670,27 @@
// (splice in "6Prefix") ^^^^^^^
snprintf(NewName.data(), BufLen, "_ZN%u%s%s", PrefixLength,
TestPrefix.c_str(), NameBase.data());
- // We ignore the snprintf return value (here and below). If we
- // somehow miscalculated the output buffer length, the output will
- // be truncated, but it will be truncated consistently for all
- // mangleName() calls on the same input string.
+ // We ignore the snprintf return value (here and below). If we somehow
+ // miscalculated the output buffer length, the output will be truncated,
+ // but it will be truncated consistently for all mangleName() calls on the
+ // same input string.
incrementSubstitutions(NewName);
return NewName.data();
}
- // Artificially limit BaseLength to 9 digits (less than 1 billion)
- // because sscanf behavior is undefined on integer overflow. If
- // there are more than 9 digits (which we test by looking at the
- // beginning of NameBase), then we consider this a failure to parse
- // a namespace mangling, and fall back to the simple prefixing.
+ // Artificially limit BaseLength to 9 digits (less than 1 billion) because
+ // sscanf behavior is undefined on integer overflow. If there are more than 9
+ // digits (which we test by looking at the beginning of NameBase), then we
+ // consider this a failure to parse a namespace mangling, and fall back to
+ // the simple prefixing.
ItemsParsed = sscanf(Name.c_str(), "_Z%9u%s", &BaseLength, NameBase.data());
if (ItemsParsed == 2 && BaseLength <= strlen(NameBase.data()) &&
!isdigit(NameBase[0])) {
// Transform _Z3barxyz ==> _ZN6Prefix3barExyz
// ^^^^^^^^ ^
- // (splice in "N6Prefix", and insert "E" after "3bar")
- // But an "I" after the identifier indicates a template argument
- // list terminated with "E"; insert the new "E" before/after the
- // old "E". E.g.:
+ // (splice in "N6Prefix", and insert "E" after "3bar") But an "I" after the
+ // identifier indicates a template argument list terminated with "E";
+ // insert the new "E" before/after the old "E". E.g.:
// Transform _Z3barIabcExyz ==> _ZN6Prefix3barIabcEExyz
// ^^^^^^^^ ^
// (splice in "N6Prefix", and insert "E" after "3barIabcE")
@@ -730,8 +728,8 @@
}
}
-// TODO(stichnot): Consider adding thread-local caches of constant
-// pool entries to reduce contention.
+// TODO(stichnot): Consider adding thread-local caches of constant pool entries
+// to reduce contention.
// All locking is done by the getConstantInt[0-9]+() target function.
Constant *GlobalContext::getConstantInt(Type Ty, int64_t Value) {
@@ -875,8 +873,8 @@
JumpTableDataList GlobalContext::getJumpTables() {
JumpTableDataList JumpTables(*getJumpTableList());
- // Make order deterministic by sorting into functions and then ID of the
- // jump table within that function.
+ // Make order deterministic by sorting into functions and then ID of the jump
+ // table within that function.
std::sort(JumpTables.begin(), JumpTables.end(),
[](const JumpTableData &A, const JumpTableData &B) {
if (A.getFunctionName() != B.getFunctionName())
@@ -946,11 +944,10 @@
Timers->at(StackID).setName(NewName);
}
-// Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr
-// at the interface to take and transfer ownership, but they
-// internally store the raw Cfg pointer in the work queue. This
-// allows e.g. future queue optimizations such as the use of atomics
-// to modify queue elements.
+// Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr at the
+// interface to take and transfer ownership, but they internally store the raw
+// Cfg pointer in the work queue. This allows e.g. future queue optimizations
+// such as the use of atomics to modify queue elements.
void GlobalContext::optQueueBlockingPush(std::unique_ptr<Cfg> Func) {
assert(Func);
OptQ.blockingPush(Func.release());
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index 8a747f8..baab9ca 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares aspects of the compilation that persist across
-/// multiple functions.
+/// This file declares aspects of the compilation that persist across multiple
+/// functions.
///
//===----------------------------------------------------------------------===//
@@ -186,9 +186,10 @@
/// translators using the same bitcode as input.
IceString mangleName(const IceString &Name) const;
- // Manage Constants.
- // getConstant*() functions are not const because they might add
- // something to the constant pool.
+ /// \name Manage Constants.
+ /// @{
+ // getConstant*() functions are not const because they might add something to
+ // the constant pool.
Constant *getConstantInt(Type Ty, int64_t Value);
Constant *getConstantInt1(int8_t ConstantInt1);
Constant *getConstantInt8(int8_t ConstantInt8);
@@ -205,11 +206,12 @@
Constant *getConstantUndef(Type Ty);
/// Returns a zero value.
Constant *getConstantZero(Type Ty);
- /// getConstantPool() returns a copy of the constant pool for
- /// constants of a given type.
+ /// getConstantPool() returns a copy of the constant pool for constants of a
+ /// given type.
ConstantList getConstantPool(Type Ty);
/// Returns a copy of the list of external symbols.
ConstantList getConstantExternSyms();
+ /// @}
/// Return a locked pointer to the registered jump tables.
JumpTableDataList getJumpTables();
@@ -299,36 +301,35 @@
/// These are predefined TimerStackIdT values.
enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num };
- /// newTimerStackID() creates a new TimerStack in the global space.
- /// It does not affect any TimerStack objects in TLS.
+ /// newTimerStackID() creates a new TimerStack in the global space. It does
+ /// not affect any TimerStack objects in TLS.
TimerStackIdT newTimerStackID(const IceString &Name);
- /// dumpTimers() dumps the global timer data. As such, one probably
- /// wants to call mergeTimerStacks() as a prerequisite.
+ /// dumpTimers() dumps the global timer data. As such, one probably wants to
+ /// call mergeTimerStacks() as a prerequisite.
void dumpTimers(TimerStackIdT StackID = TSK_Default,
bool DumpCumulative = true);
- /// The following methods affect only the calling thread's TLS timer
- /// data.
+ /// The following methods affect only the calling thread's TLS timer data.
TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
void pushTimer(TimerIdT ID, TimerStackIdT StackID);
void popTimer(TimerIdT ID, TimerStackIdT StackID);
void resetTimer(TimerStackIdT StackID);
void setTimerName(TimerStackIdT StackID, const IceString &NewName);
- /// This is the first work item sequence number that the parser
- /// produces, and correspondingly the first sequence number that the
- /// emitter thread will wait for. Start numbering at 1 to leave room
- /// for a sentinel, in case e.g. we wish to inject items with a
- /// special sequence number that may be executed out of order.
+ /// This is the first work item sequence number that the parser produces, and
+ /// correspondingly the first sequence number that the emitter thread will
+ /// wait for. Start numbering at 1 to leave room for a sentinel, in case e.g.
+ /// we wish to inject items with a special sequence number that may be
+ /// executed out of order.
static uint32_t getFirstSequenceNumber() { return 1; }
- /// Adds a newly parsed and constructed function to the Cfg work
- /// queue. Notifies any idle workers that a new function is
- /// available for translating. May block if the work queue is too
- /// large, in order to control memory footprint.
+ /// Adds a newly parsed and constructed function to the Cfg work queue.
+ /// Notifies any idle workers that a new function is available for
+ /// translating. May block if the work queue is too large, in order to control
+ /// memory footprint.
void optQueueBlockingPush(std::unique_ptr<Cfg> Func);
- /// Takes a Cfg from the work queue for translating. May block if
- /// the work queue is currently empty. Returns nullptr if there is
- /// no more work - the queue is empty and either end() has been
- /// called or the Sequential flag was set.
+ /// Takes a Cfg from the work queue for translating. May block if the work
+ /// queue is currently empty. Returns nullptr if there is no more work - the
+ /// queue is empty and either end() has been called or the Sequential flag was
+ /// set.
std::unique_ptr<Cfg> optQueueBlockingPop();
/// Notifies that no more work will be added to the work queue.
void optQueueNotifyEnd() { OptQ.notifyEnd(); }
@@ -378,8 +379,8 @@
}
TranslationThreads.clear();
- // Only notify the emit queue to end after all the translation
- // threads have ended.
+ // Only notify the emit queue to end after all the translation threads have
+ // ended.
emitQueueNotifyEnd();
for (std::thread &Worker : EmitterThreads) {
Worker.join();
@@ -392,8 +393,8 @@
Timers->mergeFrom(TLS->Timers);
}
if (BuildDefs::dump()) {
- // Do a separate loop over AllThreadContexts to avoid holding
- // two locks at once.
+ // Do a separate loop over AllThreadContexts to avoid holding two locks
+ // at once.
auto Stats = getStatsCumulative();
for (ThreadContext *TLS : AllThreadContexts)
Stats->add(TLS->StatsCumulative);
@@ -413,8 +414,8 @@
ICE_TLS_SET_FIELD(TLS, MyTLS);
emitItems();
}
- /// Emit functions and global initializers from the emitter queue
- /// until the queue is empty.
+ /// Emit functions and global initializers from the emitter queue until the
+ /// queue is empty.
void emitItems();
/// Uses DataLowering to lower Globals. Side effects:
@@ -425,12 +426,11 @@
/// Lowers the profile information.
void lowerProfileData();
- /// Utility function to match a symbol name against a match string.
- /// This is used in a few cases where we want to take some action on
- /// a particular function or symbol based on a command-line argument,
- /// such as changing the verbose level for a particular function. An
- /// empty Match argument means match everything. Returns true if
- /// there is a match.
+ /// Utility function to match a symbol name against a match string. This is
+ /// used in a few cases where we want to take some action on a particular
+ /// function or symbol based on a command-line argument, such as changing the
+ /// verbose level for a particular function. An empty Match argument means
+ /// match everything. Returns true if there is a match.
static bool matchSymbolName(const IceString &SymbolName,
const IceString &Match) {
return Match.empty() || Match == SymbolName;
@@ -552,9 +552,9 @@
static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); }
};
-/// Helper class to push and pop a timer marker. The constructor
-/// pushes a marker, and the destructor pops it. This is for
-/// convenient timing of regions of code.
+/// Helper class to push and pop a timer marker. The constructor pushes a
+/// marker, and the destructor pops it. This is for convenient timing of regions
+/// of code.
class TimerMarker {
TimerMarker() = delete;
TimerMarker(const TimerMarker &) = delete;
@@ -589,8 +589,7 @@
bool Active = false;
};
-/// Helper class for locking the streams and then automatically
-/// unlocking them.
+/// Helper class for locking the streams and then automatically unlocking them.
class OstreamLocker {
private:
OstreamLocker() = delete;
diff --git a/src/IceGlobalInits.cpp b/src/IceGlobalInits.cpp
index c95456c..2c1460a 100644
--- a/src/IceGlobalInits.cpp
+++ b/src/IceGlobalInits.cpp
@@ -8,9 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements the notion of function declarations, global
-/// variable declarations, and the corresponding variable initializers
-/// in Subzero.
+/// This file implements the notion of function declarations, global variable
+/// declarations, and the corresponding variable initializers in Subzero.
///
//===----------------------------------------------------------------------===//
@@ -152,9 +151,8 @@
return;
dumpType(Stream);
Stream << " c\"";
- // Code taken from PrintEscapedString() in AsmWriter.cpp. Keep
- // the strings in the same format as the .ll file for practical
- // diffing.
+ // Code taken from PrintEscapedString() in AsmWriter.cpp. Keep the strings in
+ // the same format as the .ll file for practical diffing.
for (uint8_t C : Contents) {
if (isprint(C) && C != '\\' && C != '"')
Stream << C;
diff --git a/src/IceGlobalInits.h b/src/IceGlobalInits.h
index c15aed0..8f51db2 100644
--- a/src/IceGlobalInits.h
+++ b/src/IceGlobalInits.h
@@ -8,10 +8,10 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares the representation of function declarations,
-/// global variable declarations, and the corresponding variable
-/// initializers in Subzero. Global variable initializers are
-/// represented as a sequence of simple initializers.
+/// This file declares the representation of function declarations, global
+/// variable declarations, and the corresponding variable initializers in
+/// Subzero. Global variable initializers are represented as a sequence of
+/// simple initializers.
///
//===----------------------------------------------------------------------===//
@@ -81,8 +81,8 @@
/// Returns true if when emitting names, we should suppress mangling.
virtual bool getSuppressMangling() const = 0;
- /// Mangles name for cross tests, unless external and not defined locally
- /// (so that relocations accross pnacl-sz and pnacl-llc will work).
+ /// Mangles name for cross tests, unless external and not defined locally (so
+ /// that relocations across pnacl-sz and pnacl-llc will work).
virtual IceString mangleName(GlobalContext *Ctx) const {
return getSuppressMangling() ? Name : Ctx->mangleName(Name);
}
@@ -97,8 +97,8 @@
llvm::GlobalValue::LinkageTypes Linkage;
};
-/// Models a function declaration. This includes the type signature of
-/// the function, its calling conventions, and its linkage.
+/// Models a function declaration. This includes the type signature of the
+/// function, its calling conventions, and its linkage.
class FunctionDeclaration : public GlobalDeclaration {
FunctionDeclaration() = delete;
FunctionDeclaration(const FunctionDeclaration &) = delete;
@@ -286,8 +286,7 @@
llvm::isa<ZeroInitializer>((*Initializers)[0].get()));
}
- /// Returns the number of bytes for the initializer of the global
- /// address.
+ /// Returns the number of bytes for the initializer of the global address.
SizeT getNumBytes() const {
SizeT Count = 0;
for (const std::unique_ptr<Initializer> &Init : *Initializers) {
@@ -296,19 +295,18 @@
return Count;
}
- /// Adds Initializer to the list of initializers. Takes ownership of
- /// the initializer.
+ /// Adds Initializer to the list of initializers. Takes ownership of the
+ /// initializer.
void addInitializer(std::unique_ptr<Initializer> Initializer) {
Initializers->emplace_back(std::move(Initializer));
HasInitializer = true;
}
- /// Prints out type for initializer associated with the declaration
- /// to Stream.
+ /// Prints out type for initializer associated with the declaration to Stream.
void dumpType(Ostream &Stream) const final;
- /// Prints out the definition of the global variable declaration
- /// (including initialization).
+ /// Prints out the definition of the global variable declaration (including
+ /// initialization).
void dump(GlobalContext *Ctx, Ostream &Stream) const final;
static bool classof(const GlobalDeclaration *Addr) {
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index b92e954..d9b91f0 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements the Inst class, primarily the various
-/// subclass constructors and dump routines.
+/// This file implements the Inst class, primarily the various subclass
+/// constructors and dump routines.
///
//===----------------------------------------------------------------------===//
@@ -82,15 +82,15 @@
Number = isDeleted() ? NumberDeleted : Func->newInstNumber();
}
-// Delete the instruction if its tentative Dead flag is still set
-// after liveness analysis.
+// Delete the instruction if its tentative Dead flag is still set after
+// liveness analysis.
void Inst::deleteIfDead() {
if (Dead)
setDeleted();
}
-// If Src is a Variable, it returns true if this instruction ends
-// Src's live range. Otherwise, returns false.
+// If Src is a Variable, it returns true if this instruction ends Src's live
+// range. Otherwise, returns false.
bool Inst::isLastUse(const Operand *TestSrc) const {
if (LiveRangesEnded == 0)
return false; // early-exit optimization
@@ -116,17 +116,16 @@
// with SpliceAssn spliced in:
// d = [x,y]
//
-// Reconstruct the LiveRangesEnded bitmask in this instruction by
-// combining the LiveRangesEnded values of OrigInst and SpliceAssn.
-// If operands d and [x,y] contain a different number of variables,
-// then the bitmask position for e may be different in OrigInst and
-// the current instruction, requiring extra shifts and masks in the
-// computation. In the example above, OrigInst has variable e in bit
-// position 3, whereas the current instruction has e in bit position 4
+// Reconstruct the LiveRangesEnded bitmask in this instruction by combining the
+// LiveRangesEnded values of OrigInst and SpliceAssn. If operands d and [x,y]
+// contain a different number of variables, then the bitmask position for e may
+// be different in OrigInst and the current instruction, requiring extra shifts
+// and masks in the computation. In the example above, OrigInst has variable e
+// in bit position 3, whereas the current instruction has e in bit position 4
// because [x,y] consumes 2 bitmask slots while d only consumed 1.
//
-// Additionally, set HasSideEffects if either OrigInst or SpliceAssn
-// have HasSideEffects set.
+// Additionally, set HasSideEffects if either OrigInst or SpliceAssn have
+// HasSideEffects set.
void Inst::spliceLivenessInfo(Inst *OrigInst, Inst *SpliceAssn) {
HasSideEffects |= OrigInst->HasSideEffects;
HasSideEffects |= SpliceAssn->HasSideEffects;
@@ -184,8 +183,8 @@
}
if (Dead)
return false;
- // Phi arguments only get added to Live in the predecessor node, but
- // we still need to update LiveRangesEnded.
+ // Phi arguments only get added to Live in the predecessor node, but we still
+ // need to update LiveRangesEnded.
bool IsPhi = llvm::isa<InstPhi>(this);
resetLastUses();
FOREACH_VAR_IN_INST(Var, *this) {
@@ -195,20 +194,21 @@
if (!IsPhi) {
Live[VarNum] = true;
// For a variable in SSA form, its live range can end at most once in a
- // basic block. However, after lowering to two-address instructions, we
- // end up with sequences like "t=b;t+=c;a=t" where t's live range begins
- // and ends twice. ICE only allows a variable to have a single liveness
- // interval in a basic block (except for blocks where a variable is
- // live-in and live-out but there is a gap in the middle). Therefore,
- // this lowered sequence needs to represent a single conservative live
- // range for t. Since the instructions are being traversed backwards,
- // we make sure LiveEnd is only set once by setting it only when
- // LiveEnd[VarNum]==0 (sentinel value). Note that it's OK to set
- // LiveBegin multiple times because of the backwards traversal.
+ // basic block. However, after lowering to two-address instructions, we
+ // end up with sequences like "t=b;t+=c;a=t" where t's live range
+ // begins and ends twice. ICE only allows a variable to have a single
+ // liveness interval in a basic block (except for blocks where a
+ // variable is live-in and live-out but there is a gap in the middle).
+ // Therefore, this lowered sequence needs to represent a single
+ // conservative live range for t. Since the instructions are being
+ // traversed backwards, we make sure LiveEnd is only set once by
+ // setting it only when LiveEnd[VarNum]==0 (sentinel value). Note that
+ // it's OK to set LiveBegin multiple times because of the backwards
+ // traversal.
if (LiveEnd && Liveness->getRangeMask(Var->getIndex())) {
// Ideally, we would verify that VarNum wasn't already added in this
// block, but this can't be done very efficiently with LiveEnd as a
- // vector. Instead, livenessPostprocess() verifies this after the
+ // vector. Instead, livenessPostprocess() verifies this after the
// vector has been sorted.
LiveEnd->push_back(std::make_pair(VarNum, InstNumber));
}
@@ -249,9 +249,9 @@
addSource(Source);
}
-// If TargetTrue==TargetFalse, we turn it into an unconditional
-// branch. This ensures that, along with the 'switch' instruction
-// semantics, there is at most one edge from one node to another.
+// If TargetTrue==TargetFalse, we turn it into an unconditional branch. This
+// ensures that, along with the 'switch' instruction semantics, there is at
+// most one edge from one node to another.
InstBr::InstBr(Cfg *Func, Operand *Source, CfgNode *TargetTrue_,
CfgNode *TargetFalse_)
: InstHighLevel(Func, Inst::Br, 1, nullptr), TargetFalse(TargetFalse_),
@@ -334,18 +334,18 @@
Labels = Func->allocateArrayOf<CfgNode *>(MaxSrcs);
}
-// TODO: A Switch instruction (and maybe others) can add duplicate
-// edges. We may want to de-dup Phis and validate consistency (i.e.,
-// the source operands are the same for duplicate edges), though it
-// seems the current lowering code is OK with this situation.
+// TODO: A Switch instruction (and maybe others) can add duplicate edges. We
+// may want to de-dup Phis and validate consistency (i.e., the source operands
+// are the same for duplicate edges), though it seems the current lowering code
+// is OK with this situation.
void InstPhi::addArgument(Operand *Source, CfgNode *Label) {
Labels[getSrcSize()] = Label;
addSource(Source);
}
-// Find the source operand corresponding to the incoming edge for the
-// given node. TODO: This uses a linear-time search, which could be
-// improved if it becomes a problem.
+// Find the source operand corresponding to the incoming edge for the given
+// node. TODO: This uses a linear-time search, which could be improved if it
+// becomes a problem.
Operand *InstPhi::getOperandForTarget(CfgNode *Target) const {
for (SizeT I = 0; I < getSrcSize(); ++I) {
if (Labels[I] == Target)
@@ -355,9 +355,9 @@
return nullptr;
}
-// Updates liveness for a particular operand based on the given
-// predecessor edge. Doesn't mark the operand as live if the Phi
-// instruction is dead or deleted.
+// Updates liveness for a particular operand based on the given predecessor
+// edge. Doesn't mark the operand as live if the Phi instruction is dead or
+// deleted.
void InstPhi::livenessPhiOperand(LivenessBV &Live, CfgNode *Target,
Liveness *Liveness) {
if (isDeleted() || Dead)
@@ -377,8 +377,8 @@
llvm_unreachable("Phi operand not found for specified target node");
}
-// Change "a=phi(...)" to "a_phi=phi(...)" and return a new
-// instruction "a=a_phi".
+// Change "a=phi(...)" to "a_phi=phi(...)" and return a new instruction
+// "a=a_phi".
Inst *InstPhi::lower(Cfg *Func) {
Variable *Dest = getDest();
assert(Dest);
@@ -562,8 +562,8 @@
return;
Ostream &Str = Func->getContext()->getStrDump();
bool First = true;
- // Print "LIVEEND={a,b,c}" for all source operands whose live ranges
- // are known to end at this instruction.
+ // Print "LIVEEND={a,b,c}" for all source operands whose live ranges are
+ // known to end at this instruction.
if (Func->isVerbose(IceV_Liveness)) {
FOREACH_VAR_IN_INST(Var, *this) {
if (isLastUse(Var)) {
@@ -886,8 +886,7 @@
void InstFakeDef::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
- // Go ahead and "emit" these for now, since they are relatively
- // rare.
+ // Go ahead and "emit" these for now, since they are relatively rare.
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\t# ";
getDest()->emit(Func);
@@ -948,9 +947,8 @@
if (!SrcVar)
return false;
if (Dest->hasReg() && Dest->getRegNum() == SrcVar->getRegNum()) {
- // TODO: On x86-64, instructions like "mov eax, eax" are used to
- // clear the upper 32 bits of rax. We need to recognize and
- // preserve these.
+ // TODO: On x86-64, instructions like "mov eax, eax" are used to clear the
+ // upper 32 bits of rax. We need to recognize and preserve these.
return true;
}
if (!Dest->hasReg() && !SrcVar->hasReg() &&
diff --git a/src/IceInst.def b/src/IceInst.def
index a9cadb2..d265213 100644
--- a/src/IceInst.def
+++ b/src/IceInst.def
@@ -7,8 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines properties of ICE instructions in the form of
-// x-macros.
+// This file defines properties of ICE instructions in the form of x-macros.
//
//===----------------------------------------------------------------------===//
diff --git a/src/IceInst.h b/src/IceInst.h
index 80b2bd2..a727683 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -25,9 +25,9 @@
// TODO: The Cfg structure, and instructions in particular, need to be
// validated for things like valid operand types, valid branch targets, proper
-// ordering of Phi and non-Phi instructions, etc. Most of the validity
-// checking will be done in the bitcode reader. We need a list of everything
-// that should be validated, and tests for each.
+// ordering of Phi and non-Phi instructions, etc. Most of the validity checking
+// will be done in the bitcode reader. We need a list of everything that should
+// be validated, and tests for each.
namespace Ice {
@@ -118,9 +118,9 @@
return NodeList();
}
virtual bool isUnconditionalBranch() const { return false; }
- /// If the instruction is a branch-type instruction with OldNode as a
- /// target, repoint it to NewNode and return true, otherwise return
- /// false. Repoint all instances of OldNode as a target.
+ /// If the instruction is a branch-type instruction with OldNode as a target,
+ /// repoint it to NewNode and return true, otherwise return false. Repoint all
+ /// instances of OldNode as a target.
virtual bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) {
(void)OldNode;
(void)NewNode;
@@ -130,11 +130,11 @@
virtual bool isSimpleAssign() const { return false; }
void livenessLightweight(Cfg *Func, LivenessBV &Live);
- // Calculates liveness for this instruction. Returns true if this
- /// instruction is (tentatively) still live and should be retained, and false
- /// if this instruction is (tentatively) dead and should be deleted. The
- /// decision is tentative until the liveness dataflow algorithm has converged,
- /// and then a separate pass permanently deletes dead instructions.
+ /// Calculates liveness for this instruction. Returns true if this instruction
+ /// is (tentatively) still live and should be retained, and false if this
+ /// instruction is (tentatively) dead and should be deleted. The decision is
+ /// tentative until the liveness dataflow algorithm has converged, and then a
+ /// separate pass permanently deletes dead instructions.
bool liveness(InstNumberT InstNumber, LivenessBV &Live, Liveness *Liveness,
LiveBeginEndMap *LiveBegin, LiveBeginEndMap *LiveEnd);
@@ -143,13 +143,12 @@
/// instructions, and a target-specific instruction results in a single native
/// instruction.
virtual uint32_t getEmitInstCount() const { return 0; }
- // TODO(stichnot): Change Inst back to abstract once the g++ build
- // issue is fixed. llvm::ilist<Ice::Inst> doesn't work under g++
- // because the resize(size_t, Ice::Inst) method is incorrectly
- // declared and thus doesn't allow the abstract class Ice::Inst.
- // The method should be declared resize(size_t, const Ice::Inst &).
- // virtual void emit(const Cfg *Func) const = 0;
- // virtual void emitIAS(const Cfg *Func) const = 0;
+ // TODO(stichnot): Change Inst back to abstract once the g++ build issue is
+ // fixed. llvm::ilist<Ice::Inst> doesn't work under g++ because the
+ // resize(size_t, Ice::Inst) method is incorrectly declared and thus doesn't
+ // allow the abstract class Ice::Inst. The method should be declared
+ // resize(size_t, const Ice::Inst &). virtual void emit(const Cfg *Func)
+ // const = 0; virtual void emitIAS(const Cfg *Func) const = 0;
virtual void emit(const Cfg *) const {
llvm_unreachable("emit on abstract class");
}
@@ -179,8 +178,8 @@
LiveRangesEnded |= (((LREndedBits)1u) << VarIndex);
}
void resetLastUses() { LiveRangesEnded = 0; }
- /// The destroy() method lets the instruction cleanly release any
- /// memory that was allocated via the Cfg's allocator.
+ /// The destroy() method lets the instruction cleanly release any memory that
+ /// was allocated via the Cfg's allocator.
virtual void destroy(Cfg *Func) { Func->deallocateArrayOf<Operand *>(Srcs); }
const InstKind Kind;
@@ -188,17 +187,17 @@
InstNumberT Number;
/// Deleted means irrevocably deleted.
bool Deleted = false;
- /// Dead means one of two things depending on context: (1) pending
- /// deletion after liveness analysis converges, or (2) marked for
- /// deletion during lowering due to a folded bool operation.
+ /// Dead means one of two things depending on context: (1) pending deletion
+ /// after liveness analysis converges, or (2) marked for deletion during
+ /// lowering due to a folded bool operation.
bool Dead = false;
- /// HasSideEffects means the instruction is something like a function
- /// call or a volatile load that can't be removed even if its Dest
- /// variable is not live.
+ /// HasSideEffects means the instruction is something like a function call or
+ /// a volatile load that can't be removed even if its Dest variable is not
+ /// live.
bool HasSideEffects = false;
- /// IsDestNonKillable means that liveness analysis shouldn't consider
- /// this instruction to kill the Dest variable. This is used when
- /// lowering produces two assignments to the same variable.
+ /// IsDestNonKillable means that liveness analysis shouldn't consider this
+ /// instruction to kill the Dest variable. This is used when lowering produces
+ /// two assignments to the same variable.
bool IsDestNonKillable = false;
Variable *Dest;
@@ -207,13 +206,12 @@
Operand **Srcs;
/// LiveRangesEnded marks which Variables' live ranges end in this
- /// instruction. An instruction can have an arbitrary number of
- /// source operands (e.g. a call instruction), and each source
- /// operand can contain 0 or 1 Variable (and target-specific operands
- /// could contain more than 1 Variable). All the variables in an
- /// instruction are conceptually flattened and each variable is
- /// mapped to one bit position of the LiveRangesEnded bit vector.
- /// Only the first CHAR_BIT * sizeof(LREndedBits) variables are
+ /// instruction. An instruction can have an arbitrary number of source
+ /// operands (e.g. a call instruction), and each source operand can contain 0
+ /// or 1 Variable (and target-specific operands could contain more than 1
+ /// Variable). All the variables in an instruction are conceptually flattened
+ /// and each variable is mapped to one bit position of the LiveRangesEnded bit
+ /// vector. Only the first CHAR_BIT * sizeof(LREndedBits) variables are
/// tracked this way.
using LREndedBits = uint32_t; // only first 32 src operands tracked, sorry
LREndedBits LiveRangesEnded;
@@ -235,9 +233,9 @@
}
};
-/// Alloca instruction. This captures the size in bytes as getSrc(0),
-/// and the required alignment in bytes. The alignment must be either
-/// 0 (no alignment required) or a power of 2.
+/// Alloca instruction. This captures the size in bytes as getSrc(0), and the
+/// required alignment in bytes. The alignment must be either 0 (no alignment
+/// required) or a power of 2.
class InstAlloca : public InstHighLevel {
InstAlloca() = delete;
InstAlloca(const InstAlloca &) = delete;
@@ -261,8 +259,8 @@
const uint32_t AlignInBytes;
};
-/// Binary arithmetic instruction. The source operands are captured in
-/// getSrc(0) and getSrc(1).
+/// Binary arithmetic instruction. The source operands are captured in getSrc(0)
+/// and getSrc(1).
class InstArithmetic : public InstHighLevel {
InstArithmetic() = delete;
InstArithmetic(const InstArithmetic &) = delete;
@@ -296,12 +294,11 @@
const OpKind Op;
};
-/// Assignment instruction. The source operand is captured in
-/// getSrc(0). This is not part of the LLVM bitcode, but is a useful
-/// abstraction for some of the lowering. E.g., if Phi instruction
-/// lowering happens before target lowering, or for representing an
-/// Inttoptr instruction, or as an intermediate step for lowering a
-/// Load instruction.
+/// Assignment instruction. The source operand is captured in getSrc(0). This is
+/// not part of the LLVM bitcode, but is a useful abstraction for some of the
+/// lowering. E.g., if Phi instruction lowering happens before target lowering,
+/// or for representing an Inttoptr instruction, or as an intermediate step for
+/// lowering a Load instruction.
class InstAssign : public InstHighLevel {
InstAssign() = delete;
InstAssign(const InstAssign &) = delete;
@@ -319,16 +316,16 @@
InstAssign(Cfg *Func, Variable *Dest, Operand *Source);
};
-/// Branch instruction. This represents both conditional and
-/// unconditional branches.
+/// Branch instruction. This represents both conditional and unconditional
+/// branches.
class InstBr : public InstHighLevel {
InstBr() = delete;
InstBr(const InstBr &) = delete;
InstBr &operator=(const InstBr &) = delete;
public:
- /// Create a conditional branch. If TargetTrue==TargetFalse, it is
- /// optimized to an unconditional branch.
+ /// Create a conditional branch. If TargetTrue==TargetFalse, it is optimized
+ /// to an unconditional branch.
static InstBr *create(Cfg *Func, Operand *Source, CfgNode *TargetTrue,
CfgNode *TargetFalse) {
return new (Func->allocate<InstBr>())
@@ -365,8 +362,8 @@
CfgNode *TargetTrue; /// nullptr if unconditional branch
};
-/// Call instruction. The call target is captured as getSrc(0), and
-/// arg I is captured as getSrc(I+1).
+/// Call instruction. The call target is captured as getSrc(0), and arg I is
+/// captured as getSrc(I+1).
class InstCall : public InstHighLevel {
InstCall() = delete;
InstCall(const InstCall &) = delete;
@@ -376,8 +373,8 @@
static InstCall *create(Cfg *Func, SizeT NumArgs, Variable *Dest,
Operand *CallTarget, bool HasTailCall) {
/// Set HasSideEffects to true so that the call instruction can't be
- /// dead-code eliminated. IntrinsicCalls can override this if the
- /// particular intrinsic is deletable and has no side-effects.
+ /// dead-code eliminated. IntrinsicCalls can override this if the particular
+ /// intrinsic is deletable and has no side-effects.
const bool HasSideEffects = true;
const InstKind Kind = Inst::Call;
return new (Func->allocate<InstCall>()) InstCall(
@@ -458,8 +455,8 @@
Operand *Source2);
};
-/// Floating-point comparison instruction. The source operands are
-/// captured in getSrc(0) and getSrc(1).
+/// Floating-point comparison instruction. The source operands are captured in
+/// getSrc(0) and getSrc(1).
class InstFcmp : public InstHighLevel {
InstFcmp() = delete;
InstFcmp(const InstFcmp &) = delete;
@@ -489,8 +486,8 @@
const FCond Condition;
};
-/// Integer comparison instruction. The source operands are captured
-/// in getSrc(0) and getSrc(1).
+/// Integer comparison instruction. The source operands are captured in
+/// getSrc(0) and getSrc(1).
class InstIcmp : public InstHighLevel {
InstIcmp() = delete;
InstIcmp(const InstIcmp &) = delete;
@@ -543,8 +540,8 @@
Operand *Source2, Operand *Source3);
};
-/// Call to an intrinsic function. The call target is captured as getSrc(0),
-/// and arg I is captured as getSrc(I+1).
+/// Call to an intrinsic function. The call target is captured as getSrc(0), and
+/// arg I is captured as getSrc(I+1).
class InstIntrinsicCall : public InstCall {
InstIntrinsicCall() = delete;
InstIntrinsicCall(const InstIntrinsicCall &) = delete;
@@ -573,7 +570,7 @@
const Intrinsics::IntrinsicInfo Info;
};
-/// Load instruction. The source address is captured in getSrc(0).
+/// Load instruction. The source address is captured in getSrc(0).
class InstLoad : public InstHighLevel {
InstLoad() = delete;
InstLoad(const InstLoad &) = delete;
@@ -594,8 +591,8 @@
InstLoad(Cfg *Func, Variable *Dest, Operand *SourceAddr);
};
-/// Phi instruction. For incoming edge I, the node is Labels[I] and
-/// the Phi source operand is getSrc(I).
+/// Phi instruction. For incoming edge I, the node is Labels[I] and the Phi
+/// source operand is getSrc(I).
class InstPhi : public InstHighLevel {
InstPhi() = delete;
InstPhi(const InstPhi &) = delete;
@@ -621,15 +618,15 @@
Inst::destroy(Func);
}
- /// Labels[] duplicates the InEdges[] information in the enclosing
- /// CfgNode, but the Phi instruction is created before InEdges[]
- /// is available, so it's more complicated to share the list.
+ /// Labels[] duplicates the InEdges[] information in the enclosing CfgNode,
+ /// but the Phi instruction is created before InEdges[] is available, so it's
+ /// more complicated to share the list.
CfgNode **Labels;
};
-/// Ret instruction. The return value is captured in getSrc(0), but if
-/// there is no return value (void-type function), then
-/// getSrcSize()==0 and hasRetValue()==false.
+/// Ret instruction. The return value is captured in getSrc(0), but if there is
+/// no return value (void-type function), then getSrcSize()==0 and
+/// hasRetValue()==false.
class InstRet : public InstHighLevel {
InstRet() = delete;
InstRet(const InstRet &) = delete;
@@ -675,8 +672,8 @@
Operand *Source2);
};
-/// Store instruction. The address operand is captured, along with the
-/// data operand to be stored into the address.
+/// Store instruction. The address operand is captured, along with the data
+/// operand to be stored into the address.
class InstStore : public InstHighLevel {
InstStore() = delete;
InstStore(const InstStore &) = delete;
@@ -700,8 +697,7 @@
InstStore(Cfg *Func, Operand *Data, Operand *Addr);
};
-/// Switch instruction. The single source operand is captured as
-/// getSrc(0).
+/// Switch instruction. The single source operand is captured as getSrc(0).
class InstSwitch : public InstHighLevel {
InstSwitch() = delete;
InstSwitch(const InstSwitch &) = delete;
@@ -744,8 +740,7 @@
CfgNode **Labels; /// size is NumCases
};
-/// Unreachable instruction. This is a terminator instruction with no
-/// operands.
+/// Unreachable instruction. This is a terminator instruction with no operands.
class InstUnreachable : public InstHighLevel {
InstUnreachable() = delete;
InstUnreachable(const InstUnreachable &) = delete;
@@ -765,7 +760,7 @@
explicit InstUnreachable(Cfg *Func);
};
-/// BundleLock instruction. There are no operands. Contains an option
+/// BundleLock instruction. There are no operands. Contains an option
/// indicating whether align_to_end is specified.
class InstBundleLock : public InstHighLevel {
InstBundleLock() = delete;
@@ -791,7 +786,7 @@
InstBundleLock(Cfg *Func, Option BundleOption);
};
-/// BundleUnlock instruction. There are no operands.
+/// BundleUnlock instruction. There are no operands.
class InstBundleUnlock : public InstHighLevel {
InstBundleUnlock() = delete;
InstBundleUnlock(const InstBundleUnlock &) = delete;
@@ -812,18 +807,17 @@
explicit InstBundleUnlock(Cfg *Func);
};
-/// FakeDef instruction. This creates a fake definition of a variable,
-/// which is how we represent the case when an instruction produces
-/// multiple results. This doesn't happen with high-level ICE
-/// instructions, but might with lowered instructions. For example,
-/// this would be a way to represent condition flags being modified by
-/// an instruction.
+/// FakeDef instruction. This creates a fake definition of a variable, which is
+/// how we represent the case when an instruction produces multiple results.
+/// This doesn't happen with high-level ICE instructions, but might with lowered
+/// instructions. For example, this would be a way to represent condition flags
+/// being modified by an instruction.
///
-/// It's generally useful to set the optional source operand to be the
-/// dest variable of the instruction that actually produces the FakeDef
-/// dest. Otherwise, the original instruction could be dead-code
-/// eliminated if its dest operand is unused, and therefore the FakeDef
-/// dest wouldn't be properly initialized.
+/// It's generally useful to set the optional source operand to be the dest
+/// variable of the instruction that actually produces the FakeDef dest.
+/// Otherwise, the original instruction could be dead-code eliminated if its
+/// dest operand is unused, and therefore the FakeDef dest wouldn't be properly
+/// initialized.
class InstFakeDef : public InstHighLevel {
InstFakeDef() = delete;
InstFakeDef(const InstFakeDef &) = delete;
@@ -843,11 +837,10 @@
InstFakeDef(Cfg *Func, Variable *Dest, Variable *Src);
};
-/// FakeUse instruction. This creates a fake use of a variable, to
-/// keep the instruction that produces that variable from being
-/// dead-code eliminated. This is useful in a variety of lowering
-/// situations. The FakeUse instruction has no dest, so it can itself
-/// never be dead-code eliminated.
+/// FakeUse instruction. This creates a fake use of a variable, to keep the
+/// instruction that produces that variable from being dead-code eliminated.
+/// This is useful in a variety of lowering situations. The FakeUse instruction
+/// has no dest, so it can itself never be dead-code eliminated.
class InstFakeUse : public InstHighLevel {
InstFakeUse() = delete;
InstFakeUse(const InstFakeUse &) = delete;
@@ -866,16 +859,15 @@
InstFakeUse(Cfg *Func, Variable *Src);
};
-/// FakeKill instruction. This "kills" a set of variables by modeling
-/// a trivial live range at this instruction for each (implicit)
-/// variable. The primary use is to indicate that scratch registers
-/// are killed after a call, so that the register allocator won't
-/// assign a scratch register to a variable whose live range spans a
-/// call.
+/// FakeKill instruction. This "kills" a set of variables by modeling a trivial
+/// live range at this instruction for each (implicit) variable. The primary use
+/// is to indicate that scratch registers are killed after a call, so that the
+/// register allocator won't assign a scratch register to a variable whose live
+/// range spans a call.
///
-/// The FakeKill instruction also holds a pointer to the instruction
-/// that kills the set of variables, so that if that linked instruction
-/// gets dead-code eliminated, the FakeKill instruction will as well.
+/// The FakeKill instruction also holds a pointer to the instruction that kills
+/// the set of variables, so that if that linked instruction gets dead-code
+/// eliminated, the FakeKill instruction will as well.
class InstFakeKill : public InstHighLevel {
InstFakeKill() = delete;
InstFakeKill(const InstFakeKill &) = delete;
@@ -898,10 +890,9 @@
const Inst *Linked;
};
-/// JumpTable instruction. This represents a jump table that will be
-/// stored in the .rodata section. This is used to track and repoint
-/// the target CfgNodes which may change, for example due to
-/// splitting for phi lowering.
+/// JumpTable instruction. This represents a jump table that will be stored in
+/// the .rodata section. This is used to track and repoint the target CfgNodes
+/// which may change, for example due to splitting for phi lowering.
class InstJumpTable : public InstHighLevel {
InstJumpTable() = delete;
InstJumpTable(const InstJumpTable &) = delete;
@@ -968,8 +959,8 @@
namespace llvm {
-/// Override the default ilist traits so that Inst's private ctor and
-/// deleted dtor aren't invoked.
+/// Override the default ilist traits so that Inst's private ctor and deleted
+/// dtor aren't invoked.
template <>
struct ilist_traits<Ice::Inst> : public ilist_default_traits<Ice::Inst> {
Ice::Inst *createSentinel() const {
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index 9a68115..d449641 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements the InstARM32 and OperandARM32 classes,
-/// primarily the constructors and the dump()/emit() methods.
+/// This file implements the InstARM32 and OperandARM32 classes, primarily the
+/// constructors and the dump()/emit() methods.
///
//===----------------------------------------------------------------------===//
@@ -271,16 +271,14 @@
TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label) {}
bool InstARM32Br::optimizeBranch(const CfgNode *NextNode) {
- // If there is no next block, then there can be no fallthrough to
- // optimize.
+ // If there is no next block, then there can be no fallthrough to optimize.
if (NextNode == nullptr)
return false;
// Intra-block conditional branches can't be optimized.
if (Label)
return false;
- // If there is no fallthrough node, such as a non-default case label
- // for a switch instruction, then there is no opportunity to
- // optimize.
+ // If there is no fallthrough node, such as a non-default case label for a
+ // switch instruction, then there is no opportunity to optimize.
if (getTargetFalse() == nullptr)
return false;
@@ -290,15 +288,15 @@
setDeleted();
return true;
}
- // If the fallthrough is to the next node, set fallthrough to nullptr
- // to indicate.
+ // If the fallthrough is to the next node, set fallthrough to nullptr to
+ // indicate.
if (getTargetFalse() == NextNode) {
TargetFalse = nullptr;
return true;
}
- // If TargetTrue is the next node, and TargetFalse is not nullptr
- // (which was already tested above), then invert the branch
- // condition, swap the targets, and set new fallthrough to nullptr.
+ // If TargetTrue is the next node, and TargetFalse is not nullptr (which was
+ // already tested above), then invert the branch condition, swap the targets,
+ // and set new fallthrough to nullptr.
if (getTargetTrue() == NextNode) {
assert(Predicate != CondARM32::AL);
setPredicate(getOppositeCondition(getPredicate()));
@@ -338,10 +336,10 @@
InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests)
: InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) {
- // Track modifications to Dests separately via FakeDefs.
- // Also, a pop instruction affects the stack pointer and so it should not
- // be allowed to be automatically dead-code eliminated. This is automatic
- // since we leave the Dest as nullptr.
+ // Track modifications to Dests separately via FakeDefs. Also, a pop
+ // instruction affects the stack pointer and so it should not be allowed to
+ // be automatically dead-code eliminated. This is automatic since we leave
+ // the Dest as nullptr.
}
InstARM32Push::InstARM32Push(Cfg *Func, const VarList &Srcs)
@@ -450,8 +448,8 @@
Operand *Src0 = getSrc(0);
if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) {
if (!Src0V->hasReg()) {
- // Always use the whole stack slot. A 32-bit load has a larger range
- // of offsets than 16-bit, etc.
+ // Always use the whole stack slot. A 32-bit load has a larger range of
+ // offsets than 16-bit, etc.
ActualOpcode = IceString("ldr");
}
} else {
@@ -662,13 +660,13 @@
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
if (llvm::isa<ConstantInteger32>(getCallTarget())) {
- // This shouldn't happen (typically have to copy the full 32-bits
- // to a register and do an indirect jump).
+ // This shouldn't happen (typically have to copy the full 32-bits to a
+ // register and do an indirect jump).
llvm::report_fatal_error("ARM32Call to ConstantInteger32");
} else if (const auto CallTarget =
llvm::dyn_cast<ConstantRelocatable>(getCallTarget())) {
- // Calls only have 24-bits, but the linker should insert veneers to
- // extend the range if needed.
+ // Calls only have 24-bits, but the linker should insert veneers to extend
+ // the range if needed.
Str << "\t"
<< "bl"
<< "\t";
diff --git a/src/IceInstARM32.def b/src/IceInstARM32.def
index 1836667..4e34cbf 100644
--- a/src/IceInstARM32.def
+++ b/src/IceInstARM32.def
@@ -17,20 +17,20 @@
// NOTE: PC and SP are not considered isInt, to avoid register allocating.
//
// For the NaCl sandbox we also need to r9 for TLS, so just reserve always.
-// TODO(jvoung): Allow r9 to be isInt when sandboxing is turned off
-// (native mode).
+// TODO(jvoung): Allow r9 to be isInt when sandboxing is turned off (native
+// mode).
//
// IP is not considered isInt to reserve it as a scratch register. A scratch
// register is useful for expanding instructions post-register allocation.
//
-// LR is not considered isInt to avoid being allocated as a register.
-// It is technically preserved, but save/restore is handled separately,
-// based on whether or not the function MaybeLeafFunc.
+// LR is not considered isInt to avoid being allocated as a register. It is
+// technically preserved, but save/restore is handled separately, based on
+// whether or not the function MaybeLeafFunc.
// ALIASESn is a family of macros that we use to define register aliasing in
// ARM32. n indicates how many aliases are being provided to the macro. It
-// assumes the parameters are register names declared in a namespace/class named
-// RegARM32.
+// assumes the parameters are register names declared in a namespace/class
+// named RegARM32.
#define ALIASES1(r0) \
{RegARM32::r0}
#define ALIASES2(r0, r1) \
@@ -152,12 +152,12 @@
//#define X(val, encode, name, scratch, preserved, stackptr, frameptr,
// isInt, isFP32,isFP64, isVec128, aliases_init)
-// D registers 0-7 are scratch, 8-15 are preserved, and 16-31
-// are also scratch (if supported by the D32 feature vs D16).
-// D registers are defined in reverse order so that, during register allocation,
-// Subzero will prefer higher D registers. In processors supporting the D32
-// feature this will effectively cause double allocation to bias towards
-// allocating "high" D registers, which do not alias any S registers.
+// D registers 0-7 are scratch, 8-15 are preserved, and 16-31 are also scratch
+// (if supported by the D32 feature vs D16). D registers are defined in reverse
+// order so that, during register allocation, Subzero will prefer higher D
+// registers. In processors supporting the D32 feature this will effectively
+// cause double allocation to bias towards allocating "high" D registers, which
+// do not alias any S registers.
//
// Regenerate this with the following python script:
// def print_dregs():
@@ -251,9 +251,9 @@
//#define X(val, encode, name, scratch, preserved, stackptr, frameptr,
// isInt, isFP32, isFP64, isVec128, aliases_init)
-// Q registers 0-3 are scratch, 4-7 are preserved, and 8-15
-// are also scratch (if supported by the D32 feature).
-// Q registers are defined in reverse order for the same reason as D registers.
+// Q registers 0-3 are scratch, 4-7 are preserved, and 8-15 are also scratch
+// (if supported by the D32 feature). Q registers are defined in reverse order
+// for the same reason as D registers.
//
// Regenerate this with the following python script:
// def print_qregs():
@@ -320,10 +320,10 @@
// isInt, isFP32, isFP64, isVec128, alias_init)
#undef ALIASES
-// We also provide a combined table, so that there is a namespace where
-// all of the registers are considered and have distinct numberings.
-// This is in contrast to the above, where the "encode" is based on how
-// the register numbers will be encoded in binaries and values can overlap.
+// We also provide a combined table, so that there is a namespace where all of
+// the registers are considered and have distinct numberings. This is in
+// contrast to the above, where the "encode" is based on how the register
+// numbers will be encoded in binaries and values can overlap.
#define REGARM32_TABLE \
/* val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP32, isFP64, isVec128, alias_init */ \
@@ -347,8 +347,8 @@
// define X(val, init)
// Load/Store instruction width suffixes and FP/Vector element size suffixes
-// the # of offset bits allowed as part of an addressing mode (for sign or
-// zero extending load/stores).
+// the # of offset bits allowed as part of an addressing mode (for sign or zero
+// extending load/stores).
#define ICETYPEARM32_TABLE \
/* tag, element type, int_width, vec_width, addr bits sext, zext */ \
X(IceType_void, IceType_void, "" , "" , 0 , 0) \
@@ -378,9 +378,9 @@
X(RRX, "rrx")
//#define X(tag, emit)
-// Attributes for the condition code 4-bit encoding (that is independent
-// of the APSR's NZCV fields). For example, EQ is 0, but corresponds to
-// Z = 1, and NE is 1, but corresponds to Z = 0.
+// Attributes for the condition code 4-bit encoding (that is independent of the
+// APSR's NZCV fields). For example, EQ is 0, but corresponds to Z = 1, and NE
+// is 1, but corresponds to Z = 0.
#define ICEINSTARM32COND_TABLE \
/* enum value, encoding, opposite, emit */ \
X(EQ, 0 , NE, "eq") /* equal */ \
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h
index 8c2ea6f..89e5655 100644
--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares the InstARM32 and OperandARM32 classes and
-/// their subclasses. This represents the machine instructions and
-/// operands used for ARM32 code selection.
+/// This file declares the InstARM32 and OperandARM32 classes and their
+/// subclasses. This represents the machine instructions and operands used for
+/// ARM32 code selection.
///
//===----------------------------------------------------------------------===//
@@ -27,7 +27,7 @@
class TargetARM32;
-/// OperandARM32 extends the Operand hierarchy. Its subclasses are
+/// OperandARM32 extends the Operand hierarchy. Its subclasses are
/// OperandARM32Mem and OperandARM32Flex.
class OperandARM32 : public Operand {
OperandARM32() = delete;
@@ -87,17 +87,17 @@
/// NOTE: The Variable-typed operands have to be registers.
///
/// (1) Reg + Imm. The Immediate actually has a limited number of bits
- /// for encoding, so check canHoldOffset first. It cannot handle
- /// general Constant operands like ConstantRelocatable, since a relocatable
- /// can potentially take up too many bits.
+ /// for encoding, so check canHoldOffset first. It cannot handle general
+ /// Constant operands like ConstantRelocatable, since a relocatable can
+ /// potentially take up too many bits.
static OperandARM32Mem *create(Cfg *Func, Type Ty, Variable *Base,
ConstantInteger32 *ImmOffset,
AddrMode Mode = Offset) {
return new (Func->allocate<OperandARM32Mem>())
OperandARM32Mem(Func, Ty, Base, ImmOffset, Mode);
}
- /// (2) Reg +/- Reg with an optional shift of some kind and amount.
- /// Note that this mode is disallowed in the NaCl sandbox.
+ /// (2) Reg +/- Reg with an optional shift of some kind and amount. Note that
+ /// this mode is disallowed in the NaCl sandbox.
static OperandARM32Mem *create(Cfg *Func, Type Ty, Variable *Base,
Variable *Index, ShiftKind ShiftOp = kNoShift,
uint16_t ShiftAmt = 0,
@@ -130,10 +130,10 @@
return Operand->getKind() == static_cast<OperandKind>(kMem);
}
- /// Return true if a load/store instruction for an element of type Ty
- /// can encode the Offset directly in the immediate field of the 32-bit
- /// ARM instruction. For some types, if the load is Sign extending, then
- /// the range is reduced.
+ /// Return true if a load/store instruction for an element of type Ty can
+ /// encode the Offset directly in the immediate field of the 32-bit ARM
+ /// instruction. For some types, if the load is Sign extending, then the range
+ /// is reduced.
static bool canHoldOffset(Type Ty, bool SignExt, int32_t Offset);
private:
@@ -150,10 +150,9 @@
AddrMode Mode;
};
-/// OperandARM32Flex represent the "flexible second operand" for
-/// data-processing instructions. It can be a rotatable 8-bit constant, or
-/// a register with an optional shift operand. The shift amount can even be
-/// a third register.
+/// OperandARM32Flex represent the "flexible second operand" for data-processing
+/// instructions. It can be a rotatable 8-bit constant, or a register with an
+/// optional shift operand. The shift amount can even be a third register.
class OperandARM32Flex : public OperandARM32 {
OperandARM32Flex() = delete;
OperandARM32Flex(const OperandARM32Flex &) = delete;
@@ -191,8 +190,8 @@
return Operand->getKind() == static_cast<OperandKind>(kFlexImm);
}
- /// Return true if the Immediate can fit in the ARM flexible operand.
- /// Fills in the out-params RotateAmt and Immed_8 if Immediate fits.
+ /// Return true if the Immediate can fit in the ARM flexible operand. Fills in
+ /// the out-params RotateAmt and Immed_8 if Immediate fits.
static bool canHoldImm(uint32_t Immediate, uint32_t *RotateAmt,
uint32_t *Immed_8);
@@ -244,9 +243,9 @@
/// StackVariable represents a Var that isn't assigned a register (stack-only).
/// It is assigned a stack slot, but the slot's offset may be too large to
-/// represent in the native addressing mode, and so it has a separate
-/// base register from SP/FP, where the offset from that base register is
-/// then in range.
+/// represent in the native addressing mode, and so it has a separate base
+/// register from SP/FP, where the offset from that base register is then in
+/// range.
class StackVariable final : public Variable {
StackVariable() = delete;
StackVariable(const StackVariable &) = delete;
@@ -272,8 +271,8 @@
};
/// Base class for ARM instructions. While most ARM instructions can be
-/// conditionally executed, a few of them are not predicable (halt,
-/// memory barriers, etc.).
+/// conditionally executed, a few of them are not predicable (halt, memory
+/// barriers, etc.).
class InstARM32 : public InstTarget {
InstARM32() = delete;
InstARM32(const InstARM32 &) = delete;
@@ -525,8 +524,8 @@
static const char *Opcode;
};
-/// Base class for assignment instructions.
-/// These can be tested for redundancy (and elided if redundant).
+/// Base class for assignment instructions. These can be tested for redundancy
+/// (and elided if redundant).
template <InstARM32::InstKindARM32 K>
class InstARM32Movlike : public InstARM32Pred {
InstARM32Movlike() = delete;
@@ -576,8 +575,8 @@
InstARM32ThreeAddrGPR &operator=(const InstARM32ThreeAddrGPR &) = delete;
public:
- /// Create an ordinary binary-op instruction like add, and sub.
- /// Dest and Src1 must be registers.
+ /// Create an ordinary binary-op instruction like add, and sub. Dest and Src1
+ /// must be registers.
static InstARM32ThreeAddrGPR *create(Cfg *Func, Variable *Dest,
Variable *Src0, Operand *Src1,
CondARM32::Cond Predicate,
@@ -618,10 +617,10 @@
bool SetFlags;
};
-/// Instructions of the form x := y op z, for vector/FP. We leave these as
+/// Instructions of the form x := y op z, for vector/FP. We leave these as
/// unconditional: "ARM deprecates the conditional execution of any instruction
/// encoding provided by the Advanced SIMD Extension that is not also provided
-/// by the Floating-point (VFP) extension". They do not set flags.
+/// by the Floating-point (VFP) extension". They do not set flags.
template <InstARM32::InstKindARM32 K>
class InstARM32ThreeAddrFP : public InstARM32 {
InstARM32ThreeAddrFP() = delete;
@@ -629,8 +628,8 @@
InstARM32ThreeAddrFP &operator=(const InstARM32ThreeAddrFP &) = delete;
public:
- /// Create a vector/FP binary-op instruction like vadd, and vsub.
- /// Everything must be a register.
+ /// Create a vector/FP binary-op instruction like vadd, and vsub. Everything
+ /// must be a register.
static InstARM32ThreeAddrFP *create(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1) {
return new (Func->allocate<InstARM32ThreeAddrFP>())
@@ -779,24 +778,24 @@
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32Movlike<InstARM32::Ldr>;
-/// Move instruction (variable <- flex). This is more of a pseudo-inst.
-/// If var is a register, then we use "mov". If var is stack, then we use
-/// "str" to store to the stack.
+/// Move instruction (variable <- flex). This is more of a pseudo-inst. If var
+/// is a register, then we use "mov". If var is stack, then we use "str" to
+/// store to the stack.
using InstARM32Mov = InstARM32Movlike<InstARM32::Mov>;
/// Represents various vector mov instruction forms (simple single source,
/// single dest forms only, not the 2 GPR <-> 1 D reg forms, etc.).
using InstARM32Vldr = InstARM32Movlike<InstARM32::Vldr>;
-/// MovT leaves the bottom bits alone so dest is also a source.
-/// This helps indicate that a previous MovW setting dest is not dead code.
+/// MovT leaves the bottom bits alone so dest is also a source. This helps
+/// indicate that a previous MovW setting dest is not dead code.
using InstARM32Movt = InstARM32TwoAddrGPR<InstARM32::Movt>;
using InstARM32Movw = InstARM32UnaryopGPR<InstARM32::Movw, false>;
using InstARM32Clz = InstARM32UnaryopGPR<InstARM32::Clz, false>;
using InstARM32Mvn = InstARM32UnaryopGPR<InstARM32::Mvn, false>;
using InstARM32Rbit = InstARM32UnaryopGPR<InstARM32::Rbit, false>;
using InstARM32Rev = InstARM32UnaryopGPR<InstARM32::Rev, false>;
-// Technically, the uxt{b,h} and sxt{b,h} instructions have a rotation
-// operand as well (rotate source by 8, 16, 24 bits prior to extending),
-// but we aren't using that for now, so just model as a Unaryop.
+// Technically, the uxt{b,h} and sxt{b,h} instructions have a rotation operand
+// as well (rotate source by 8, 16, 24 bits prior to extending), but we aren't
+// using that for now, so just model as a Unaryop.
using InstARM32Sxt = InstARM32UnaryopGPR<InstARM32::Sxt, true>;
using InstARM32Uxt = InstARM32UnaryopGPR<InstARM32::Uxt, true>;
using InstARM32Vsqrt = InstARM32UnaryopFP<InstARM32::Vsqrt>;
@@ -805,9 +804,9 @@
using InstARM32Cmp = InstARM32CmpLike<InstARM32::Cmp>;
using InstARM32Tst = InstARM32CmpLike<InstARM32::Tst>;
-// InstARM32Label represents an intra-block label that is the target
-// of an intra-block branch. The offset between the label and the
-// branch must be fit in the instruction immediate (considered "near").
+// InstARM32Label represents an intra-block label that is the target of an
+// intra-block branch. The offset between the label and the branch must be fit
+// in the instruction immediate (considered "near").
class InstARM32Label : public InstARM32 {
InstARM32Label() = delete;
InstARM32Label(const InstARM32Label &) = delete;
@@ -852,9 +851,9 @@
return new (Func->allocate<InstARM32Br>())
InstARM32Br(Func, NoCondTarget, Target, NoLabel, CondARM32::AL);
}
- /// Create a non-terminator conditional branch to a node, with a
- /// fallthrough to the next instruction in the current node. This is
- /// used for switch lowering.
+ /// Create a non-terminator conditional branch to a node, with a fallthrough
+ /// to the next instruction in the current node. This is used for switch
+ /// lowering.
static InstARM32Br *create(Cfg *Func, CfgNode *Target,
CondARM32::Cond Predicate) {
assert(Predicate != CondARM32::AL);
@@ -903,18 +902,18 @@
const InstARM32Label *Label; // Intra-block branch target
};
-/// AdjustStack instruction - subtracts SP by the given amount and
-/// updates the stack offset during code emission.
+/// AdjustStack instruction - subtracts SP by the given amount and updates the
+/// stack offset during code emission.
class InstARM32AdjustStack : public InstARM32 {
InstARM32AdjustStack() = delete;
InstARM32AdjustStack(const InstARM32AdjustStack &) = delete;
InstARM32AdjustStack &operator=(const InstARM32AdjustStack &) = delete;
public:
- /// Note: We need both Amount and SrcAmount. If Amount is too large then
- /// it needs to be copied to a register (so SrcAmount could be a register).
- /// However, we also need the numeric Amount for bookkeeping, and it's
- /// hard to pull that from the generic SrcAmount operand.
+ /// Note: We need both Amount and SrcAmount. If Amount is too large then it
+ /// needs to be copied to a register (so SrcAmount could be a register).
+ /// However, we also need the numeric Amount for bookkeeping, and it's hard to
+ /// pull that from the generic SrcAmount operand.
static InstARM32AdjustStack *create(Cfg *Func, Variable *SP, SizeT Amount,
Operand *SrcAmount) {
return new (Func->allocate<InstARM32AdjustStack>())
@@ -932,7 +931,7 @@
const SizeT Amount;
};
-/// Call instruction (bl/blx). Arguments should have already been pushed.
+/// Call instruction (bl/blx). Arguments should have already been pushed.
/// Technically bl and the register form of blx can be predicated, but we'll
/// leave that out until needed.
class InstARM32Call : public InstARM32 {
@@ -977,8 +976,8 @@
VarList Dests;
};
-/// Push a list of GPRs. Technically this can be predicated, but we don't
-/// need that functionality.
+/// Push a list of GPRs. Technically this can be predicated, but we don't need
+/// that functionality.
class InstARM32Push : public InstARM32 {
InstARM32Push() = delete;
InstARM32Push(const InstARM32Push &) = delete;
@@ -997,11 +996,11 @@
InstARM32Push(Cfg *Func, const VarList &Srcs);
};
-/// Ret pseudo-instruction. This is actually a "bx" instruction with
-/// an "lr" register operand, but epilogue lowering will search for a Ret
-/// instead of a generic "bx". This instruction also takes a Source
-/// operand (for non-void returning functions) for liveness analysis, though
-/// a FakeUse before the ret would do just as well.
+/// Ret pseudo-instruction. This is actually a "bx" instruction with an "lr"
+/// register operand, but epilogue lowering will search for a Ret instead of a
+/// generic "bx". This instruction also takes a Source operand (for non-void
+/// returning functions) for liveness analysis, though a FakeUse before the ret
+/// would do just as well.
///
/// NOTE: Even though "bx" can be predicated, for now leave out the predication
/// since it's not yet known to be useful for Ret. That may complicate finding
@@ -1025,8 +1024,8 @@
InstARM32Ret(Cfg *Func, Variable *LR, Variable *Source);
};
-/// Store instruction. It's important for liveness that there is no Dest
-/// operand (OperandARM32Mem instead of Dest Variable).
+/// Store instruction. It's important for liveness that there is no Dest operand
+/// (OperandARM32Mem instead of Dest Variable).
class InstARM32Str : public InstARM32Pred {
InstARM32Str() = delete;
InstARM32Str(const InstARM32Str &) = delete;
@@ -1205,9 +1204,9 @@
Variable *Dest1 = nullptr;
};
-// Declare partial template specializations of emit() methods that
-// already have default implementations. Without this, there is the
-// possibility of ODR violations and link errors.
+// Declare partial template specializations of emit() methods that already have
+// default implementations. Without this, there is the possibility of ODR
+// violations and link errors.
template <> void InstARM32Ldr::emit(const Cfg *Func) const;
template <> void InstARM32Mov::emit(const Cfg *Func) const;
diff --git a/src/IceInstMIPS32.cpp b/src/IceInstMIPS32.cpp
index e386806..7773272 100644
--- a/src/IceInstMIPS32.cpp
+++ b/src/IceInstMIPS32.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// This file implements the InstMips32 and OperandMips32 classes,
-/// primarily the constructors and the dump()/emit() methods.
+/// This file implements the InstMips32 and OperandMips32 classes, primarily the
+/// constructors and the dump()/emit() methods.
///
//===----------------------------------------------------------------------===//
diff --git a/src/IceInstMIPS32.h b/src/IceInstMIPS32.h
index e426598..1c4863f 100644
--- a/src/IceInstMIPS32.h
+++ b/src/IceInstMIPS32.h
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares the InstMIPS32 and OperandMIPS32 classes and
-/// their subclasses. This represents the machine instructions and
-/// operands used for MIPS32 code selection.
+/// This file declares the InstMIPS32 and OperandMIPS32 classes and their
+/// subclasses. This represents the machine instructions and operands used for
+/// MIPS32 code selection.
///
//===----------------------------------------------------------------------===//
@@ -47,13 +47,13 @@
}
};
-/// Ret pseudo-instruction. This is actually a "jr" instruction with
-/// an "ra" register operand, but epilogue lowering will search for a Ret
-/// instead of a generic "jr". This instruction also takes a Source
-/// operand (for non-void returning functions) for liveness analysis, though
-/// a FakeUse before the ret would do just as well.
-/// TODO(reed kotler): This needs was take from the ARM port and needs to be
-/// scrubbed in the future.
+/// Ret pseudo-instruction. This is actually a "jr" instruction with an "ra"
+/// register operand, but epilogue lowering will search for a Ret instead of a
+/// generic "jr". This instruction also takes a Source operand (for non-void
+/// returning functions) for liveness analysis, though a FakeUse before the ret
+/// would do just as well.
+// TODO(reed kotler): This needs was take from the ARM port and needs to be
+// scrubbed in the future.
class InstMIPS32Ret : public InstMIPS32 {
InstMIPS32Ret() = delete;
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index cfd7fb3..3a8c57c 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -106,9 +106,8 @@
assert(SegmentReg >= 0 && SegmentReg < SegReg_NUM);
Str << "%" << X8632::Traits::InstSegmentRegNames[SegmentReg] << ":";
}
- // Emit as Offset(Base,Index,1<<Shift).
- // Offset is emitted without the leading '$'.
- // Omit the (Base,Index,1<<Shift) part if Base==nullptr.
+ // Emit as Offset(Base,Index,1<<Shift). Offset is emitted without the leading
+ // '$'. Omit the (Base,Index,1<<Shift) part if Base==nullptr.
if (!Offset) {
// No offset, emit nothing.
} else if (const auto CI = llvm::dyn_cast<ConstantInteger32>(Offset)) {
diff --git a/src/IceInstX8664.cpp b/src/IceInstX8664.cpp
index 6d9ccd6..cb765ae 100644
--- a/src/IceInstX8664.cpp
+++ b/src/IceInstX8664.cpp
@@ -92,9 +92,8 @@
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
- // Emit as Offset(Base,Index,1<<Shift).
- // Offset is emitted without the leading '$'.
- // Omit the (Base,Index,1<<Shift) part if Base==nullptr.
+ // Emit as Offset(Base,Index,1<<Shift). Offset is emitted without the leading
+ // '$'. Omit the (Base,Index,1<<Shift) part if Base==nullptr.
if (!Offset) {
// No offset, emit nothing.
} else if (const auto CI = llvm::dyn_cast<ConstantInteger32>(Offset)) {
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 8883902..5ca9422 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -146,9 +146,8 @@
getOppositeCondition(typename Traits::Cond::BrCond Cond);
void dump(const Cfg *Func) const override;
- // Shared emit routines for common forms of instructions.
- // See the definition of emitTwoAddress() for a description of
- // ShiftHack.
+ // Shared emit routines for common forms of instructions. See the definition
+ // of emitTwoAddress() for a description of ShiftHack.
static void emitTwoAddress(const char *Opcode, const Inst *Inst,
const Cfg *Func, bool ShiftHack = false);
@@ -165,16 +164,15 @@
static bool isClassof(const Inst *Inst, InstKindX86 MyKind) {
return Inst->getKind() == static_cast<InstKind>(MyKind);
}
- // Most instructions that operate on vector arguments require vector
- // memory operands to be fully aligned (16-byte alignment for PNaCl
- // vector types). The stack frame layout and call ABI ensure proper
- // alignment for stack operands, but memory operands (originating
- // from load/store bitcode instructions) only have element-size
- // alignment guarantees. This function validates that none of the
- // operands is a memory operand of vector type, calling
- // report_fatal_error() if one is found. This function should be
- // called during emission, and maybe also in the ctor (as long as
- // that fits the lowering style).
+ // Most instructions that operate on vector arguments require vector memory
+ // operands to be fully aligned (16-byte alignment for PNaCl vector types).
+ // The stack frame layout and call ABI ensure proper alignment for stack
+ // operands, but memory operands (originating from load/store bitcode
+ // instructions) only have element-size alignment guarantees. This function
+ // validates that none of the operands is a memory operand of vector type,
+ // calling report_fatal_error() if one is found. This function should be
+ // called during emission, and maybe also in the ctor (as long as that fits
+ // the lowering style).
void validateVectorAddrMode() const {
if (this->getDest())
this->validateVectorAddrModeOpnd(this->getDest());
@@ -193,8 +191,8 @@
};
/// InstX86FakeRMW represents a non-atomic read-modify-write operation on a
-/// memory location. An InstX86FakeRMW is a "fake" instruction in that it
-/// still needs to be lowered to some actual RMW instruction.
+/// memory location. An InstX86FakeRMW is a "fake" instruction in that it still
+/// needs to be lowered to some actual RMW instruction.
///
/// If A is some memory address, D is some data value to apply, and OP is an
/// arithmetic operator, the instruction operates as: (*A) = (*A) OP D
@@ -228,17 +226,16 @@
InstArithmetic::OpKind Op, Variable *Beacon);
};
-/// InstX86Label represents an intra-block label that is the target
-/// of an intra-block branch. The offset between the label and the
-/// branch must be fit into one byte (considered "near"). These are
-/// used for lowering i1 calculations, Select instructions, and 64-bit
-/// compares on a 32-bit architecture, without basic block splitting.
-/// Basic block splitting is not so desirable for several reasons, one
-/// of which is the impact on decisions based on whether a variable's
-/// live range spans multiple basic blocks.
+/// InstX86Label represents an intra-block label that is the target of an
+/// intra-block branch. The offset between the label and the branch must be fit
+/// into one byte (considered "near"). These are used for lowering i1
+/// calculations, Select instructions, and 64-bit compares on a 32-bit
+/// architecture, without basic block splitting. Basic block splitting is not so
+/// desirable for several reasons, one of which is the impact on decisions based
+/// on whether a variable's live range spans multiple basic blocks.
///
-/// Intra-block control flow must be used with caution. Consider the
-/// sequence for "c = (a >= b ? x : y)".
+/// Intra-block control flow must be used with caution. Consider the sequence
+/// for "c = (a >= b ? x : y)".
/// cmp a, b
/// br lt, L1
/// mov c, x
@@ -247,11 +244,10 @@
/// mov c, y
/// L2:
///
-/// Labels L1 and L2 are intra-block labels. Without knowledge of the
-/// intra-block control flow, liveness analysis will determine the "mov
-/// c, x" instruction to be dead. One way to prevent this is to insert
-/// a "FakeUse(c)" instruction anywhere between the two "mov c, ..."
-/// instructions, e.g.:
+/// Labels L1 and L2 are intra-block labels. Without knowledge of the
+/// intra-block control flow, liveness analysis will determine the "mov c, x"
+/// instruction to be dead. One way to prevent this is to insert a "FakeUse(c)"
+/// instruction anywhere between the two "mov c, ..." instructions, e.g.:
///
/// cmp a, b
/// br lt, L1
@@ -262,10 +258,9 @@
/// mov c, y
/// L2:
///
-/// The down-side is that "mov c, x" can never be dead-code eliminated
-/// even if there are no uses of c. As unlikely as this situation is,
-/// it may be prevented by running dead code elimination before
-/// lowering.
+/// The down-side is that "mov c, x" can never be dead-code eliminated even if
+/// there are no uses of c. As unlikely as this situation is, it may be
+/// prevented by running dead code elimination before lowering.
template <class Machine>
class InstX86Label final : public InstX86Base<Machine> {
InstX86Label() = delete;
@@ -319,9 +314,9 @@
InstX86Br(Func, NoCondTarget, Target, NoLabel,
InstX86Base<Machine>::Traits::Cond::Br_None, Kind);
}
- /// Create a non-terminator conditional branch to a node, with a
- /// fallthrough to the next instruction in the current node. This is
- /// used for switch lowering.
+ /// Create a non-terminator conditional branch to a node, with a fallthrough
+ /// to the next instruction in the current node. This is used for switch
+ /// lowering.
static InstX86Br *
create(Cfg *Func, CfgNode *Target,
typename InstX86Base<Machine>::Traits::Cond::BrCond Condition,
@@ -381,9 +376,9 @@
const Mode Kind;
};
-/// Jump to a target outside this function, such as tailcall, nacljump,
-/// naclret, unreachable. This is different from a Branch instruction
-/// in that there is no intra-function control flow to represent.
+/// Jump to a target outside this function, such as tailcall, nacljump, naclret,
+/// unreachable. This is different from a Branch instruction in that there is no
+/// intra-function control flow to represent.
template <class Machine> class InstX86Jmp final : public InstX86Base<Machine> {
InstX86Jmp() = delete;
InstX86Jmp(const InstX86Jmp &) = delete;
@@ -405,8 +400,8 @@
InstX86Jmp(Cfg *Func, Operand *Target);
};
-/// AdjustStack instruction - subtracts esp by the given amount and
-/// updates the stack offset during code emission.
+/// AdjustStack instruction - subtracts esp by the given amount and updates the
+/// stack offset during code emission.
template <class Machine>
class InstX86AdjustStack final : public InstX86Base<Machine> {
InstX86AdjustStack() = delete;
@@ -431,7 +426,7 @@
SizeT Amount;
};
-/// Call instruction. Arguments should have already been pushed.
+/// Call instruction. Arguments should have already been pushed.
template <class Machine> class InstX86Call final : public InstX86Base<Machine> {
InstX86Call() = delete;
InstX86Call(const InstX86Call &) = delete;
@@ -514,8 +509,8 @@
Emitter;
};
-/// Emit a two-operand (GPR) instruction, where the dest operand is a
-/// Variable that's guaranteed to be a register.
+/// Emit a two-operand (GPR) instruction, where the dest operand is a Variable
+/// that's guaranteed to be a register.
template <class Machine, bool VarCanBeByte = true, bool SrcCanBeByte = true>
void emitIASRegOpTyGPR(
const Cfg *Func, Type Ty, const Variable *Dst, const Operand *Src,
@@ -540,9 +535,9 @@
Type SrcTy = this->getSrc(0)->getType();
Type DestTy = this->getDest()->getType();
Str << "\t" << Opcode << this->getWidthString(SrcTy);
- // Movsx and movzx need both the source and dest type width letter
- // to define the operation. The other unary operations have the
- // same source and dest type and as a result need only one letter.
+ // Movsx and movzx need both the source and dest type width letter to
+ // define the operation. The other unary operations have the same source
+ // and dest type and as a result need only one letter.
if (SrcTy != DestTy)
Str << this->getWidthString(DestTy);
Str << "\t";
@@ -1181,8 +1176,8 @@
Source) {}
};
-/// Move packed - copy 128 bit values between XMM registers, or mem128
-/// and XMM registers.
+/// Move packed - copy 128 bit values between XMM registers, or mem128 and XMM
+/// registers.
template <class Machine>
class InstX86Movp
: public InstX86BaseMovlike<Machine, InstX86Base<Machine>::Movp> {
@@ -1865,13 +1860,12 @@
Func, Dest, Source) {}
};
-/// movss is only a binary operation when the source and dest
-/// operands are both registers (the high bits of dest are left untouched).
-/// In other cases, it behaves like a copy (mov-like) operation (and the
-/// high bits of dest are cleared).
-/// InstX86Movss will assert that both its source and dest operands are
-/// registers, so the lowering code should use _mov instead of _movss
-/// in cases where a copy operation is intended.
+/// movss is only a binary operation when the source and dest operands are both
+/// registers (the high bits of dest are left untouched). In other cases, it
+/// behaves like a copy (mov-like) operation (and the high bits of dest are
+/// cleared). InstX86Movss will assert that both its source and dest operands
+/// are registers, so the lowering code should use _mov instead of _movss in
+/// cases where a copy operation is intended.
template <class Machine>
class InstX86MovssRegs
: public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::MovssRegs,
@@ -2072,8 +2066,8 @@
typename InstX86Base<Machine>::InstKindX86 Kind,
SizeT Maxsrcs, Variable *Dest, bool Locked)
: InstX86Base<Machine>(Func, Kind, Maxsrcs, Dest), Locked(Locked) {
- // Assume that such instructions are used for Atomics and be careful
- // with optimizations.
+ // Assume that such instructions are used for Atomics and be careful with
+ // optimizations.
this->HasSideEffects = Locked;
}
};
@@ -2174,8 +2168,7 @@
typename InstX86Base<Machine>::Traits::Cond::BrCond Condition;
};
-/// Cmpps instruction - compare packed singled-precision floating point
-/// values
+/// Cmpps instruction - compare packed singled-precision floating point values
template <class Machine>
class InstX86Cmpps final : public InstX86Base<Machine> {
InstX86Cmpps() = delete;
@@ -2204,10 +2197,10 @@
};
/// Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest>
-/// equals eax. If so, the ZF is set and <desired> is stored in <dest>.
-/// If not, ZF is cleared and <dest> is copied to eax (or subregister).
-/// <dest> can be a register or memory, while <desired> must be a register.
-/// It is the user's responsiblity to mark eax with a FakeDef.
+/// equals eax. If so, the ZF is set and <desired> is stored in <dest>. If not,
+/// ZF is cleared and <dest> is copied to eax (or subregister). <dest> can be a
+/// register or memory, while <desired> must be a register. It is the user's
+/// responsibility to mark eax with a FakeDef.
template <class Machine>
class InstX86Cmpxchg final : public InstX86BaseLockable<Machine> {
InstX86Cmpxchg() = delete;
@@ -2232,12 +2225,11 @@
Variable *Desired, bool Locked);
};
-/// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64>
-/// equals edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>.
-/// If not, ZF is cleared and <m64> is copied to edx:eax.
-/// The caller is responsible for inserting FakeDefs to mark edx
-/// and eax as modified.
-/// <m64> must be a memory operand.
+/// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64> equals
+/// edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>. If not, ZF is
+/// cleared and <m64> is copied to edx:eax. The caller is responsible for
+/// inserting FakeDefs to mark edx and eax as modified. <m64> must be a memory
+/// operand.
template <class Machine>
class InstX86Cmpxchg8b final : public InstX86BaseLockable<Machine> {
InstX86Cmpxchg8b() = delete;
@@ -2267,10 +2259,10 @@
bool Locked);
};
-/// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i}
-/// as appropriate. s=float, d=double, i=int. X and Y are determined
-/// from dest/src types. Sign and zero extension on the integer
-/// operand needs to be done separately.
+/// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i} as
+/// appropriate. s=float, d=double, i=int. X and Y are determined from dest/src
+/// types. Sign and zero extension on the integer operand needs to be done
+/// separately.
template <class Machine> class InstX86Cvt final : public InstX86Base<Machine> {
InstX86Cvt() = delete;
InstX86Cvt(const InstX86Cvt &) = delete;
@@ -2406,9 +2398,8 @@
};
/// This is essentially a "mov" instruction with an
-/// InstX86Base<Machine>::Traits::X86OperandMem
-/// operand instead of Variable as the destination. It's important
-/// for liveness that there is no Dest operand.
+/// InstX86Base<Machine>::Traits::X86OperandMem operand instead of Variable as
+/// the destination. It's important for liveness that there is no Dest operand.
template <class Machine>
class InstX86Store final : public InstX86Base<Machine> {
InstX86Store() = delete;
@@ -2434,10 +2425,9 @@
};
/// This is essentially a vector "mov" instruction with an typename
-/// InstX86Base<Machine>::Traits::X86OperandMem
-/// operand instead of Variable as the destination. It's important
-/// for liveness that there is no Dest operand. The source must be an
-/// Xmm register, since Dest is mem.
+/// InstX86Base<Machine>::Traits::X86OperandMem operand instead of Variable as
+/// the destination. It's important for liveness that there is no Dest operand.
+/// The source must be an Xmm register, since Dest is mem.
template <class Machine>
class InstX86StoreP final : public InstX86Base<Machine> {
InstX86StoreP() = delete;
@@ -2596,10 +2586,10 @@
InstX86Push(Cfg *Func, Variable *Source);
};
-/// Ret instruction. Currently only supports the "ret" version that
-/// does not pop arguments. This instruction takes a Source operand
-/// (for non-void returning functions) for liveness analysis, though
-/// a FakeUse before the ret would do just as well.
+/// Ret instruction. Currently only supports the "ret" version that does not pop
+/// arguments. This instruction takes a Source operand (for non-void returning
+/// functions) for liveness analysis, though a FakeUse before the ret would do
+/// just as well.
template <class Machine> class InstX86Ret final : public InstX86Base<Machine> {
InstX86Ret() = delete;
InstX86Ret(const InstX86Ret &) = delete;
@@ -2647,10 +2637,10 @@
const typename InstX86Base<Machine>::Traits::Cond::BrCond Condition;
};
-/// Exchanging Add instruction. Exchanges the first operand (destination
-/// operand) with the second operand (source operand), then loads the sum
-/// of the two values into the destination operand. The destination may be
-/// a register or memory, while the source must be a register.
+/// Exchanging Add instruction. Exchanges the first operand (destination
+/// operand) with the second operand (source operand), then loads the sum of the
+/// two values into the destination operand. The destination may be a register
+/// or memory, while the source must be a register.
///
/// Both the dest and source are updated. The caller should then insert a
/// FakeDef to reflect the second udpate.
@@ -2677,12 +2667,11 @@
InstX86Xadd(Cfg *Func, Operand *Dest, Variable *Source, bool Locked);
};
-/// Exchange instruction. Exchanges the first operand (destination
-/// operand) with the second operand (source operand). At least one of
-/// the operands must be a register (and the other can be reg or mem).
-/// Both the Dest and Source are updated. If there is a memory operand,
-/// then the instruction is automatically "locked" without the need for
-/// a lock prefix.
+/// Exchange instruction. Exchanges the first operand (destination operand) with
+/// the second operand (source operand). At least one of the operands must be a
+/// register (and the other can be reg or mem). Both the Dest and Source are
+/// updated. If there is a memory operand, then the instruction is automatically
+/// "locked" without the need for a lock prefix.
template <class Machine> class InstX86Xchg final : public InstX86Base<Machine> {
InstX86Xchg() = delete;
InstX86Xchg(const InstX86Xchg &) = delete;
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 336e268..677a1d3 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -112,16 +112,14 @@
template <class Machine>
bool InstX86Br<Machine>::optimizeBranch(const CfgNode *NextNode) {
- // If there is no next block, then there can be no fallthrough to
- // optimize.
+ // If there is no next block, then there can be no fallthrough to optimize.
if (NextNode == nullptr)
return false;
// Intra-block conditional branches can't be optimized.
if (Label)
return false;
- // If there is no fallthrough node, such as a non-default case label
- // for a switch instruction, then there is no opportunity to
- // optimize.
+ // If there is no fallthrough node, such as a non-default case label for a
+ // switch instruction, then there is no opportunity to optimize.
if (getTargetFalse() == nullptr)
return false;
@@ -132,15 +130,15 @@
this->setDeleted();
return true;
}
- // If the fallthrough is to the next node, set fallthrough to nullptr
- // to indicate.
+ // If the fallthrough is to the next node, set fallthrough to nullptr to
+ // indicate.
if (getTargetFalse() == NextNode) {
TargetFalse = nullptr;
return true;
}
- // If TargetTrue is the next node, and TargetFalse is not nullptr
- // (which was already tested above), then invert the branch
- // condition, swap the targets, and set new fallthrough to nullptr.
+ // If TargetTrue is the next node, and TargetFalse is not nullptr (which was
+ // already tested above), then invert the branch condition, swap the targets,
+ // and set new fallthrough to nullptr.
if (getTargetTrue() == NextNode) {
assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None);
Condition = this->getOppositeCondition(Condition);
@@ -185,8 +183,8 @@
typename InstX86Base<Machine>::Traits::Cond::BrCond Condition)
: InstX86Base<Machine>(Func, InstX86Base<Machine>::Cmov, 2, Dest),
Condition(Condition) {
- // The final result is either the original Dest, or Source, so mark
- // both as sources.
+ // The final result is either the original Dest, or Source, so mark both as
+ // sources.
this->addSource(Dest);
this->addSource(Source);
}
@@ -320,12 +318,11 @@
template <class Machine>
InstX86Pop<Machine>::InstX86Pop(Cfg *Func, Variable *Dest)
: InstX86Base<Machine>(Func, InstX86Base<Machine>::Pop, 0, Dest) {
- // A pop instruction affects the stack pointer and so it should not
- // be allowed to be automatically dead-code eliminated. (The
- // corresponding push instruction doesn't need this treatment
- // because it has no dest variable and therefore won't be dead-code
- // eliminated.) This is needed for late-stage liveness analysis
- // (e.g. asm-verbose mode).
+ // A pop instruction affects the stack pointer and so it should not be
+ // allowed to be automatically dead-code eliminated. (The corresponding push
+ // instruction doesn't need this treatment because it has no dest variable
+ // and therefore won't be dead-code eliminated.) This is needed for
+ // late-stage liveness analysis (e.g. asm-verbose mode).
this->HasSideEffects = true;
}
@@ -529,11 +526,10 @@
Asm->jmp(InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
Var->getRegNum()));
} else {
- // The jmp instruction with a memory operand should be possible
- // to encode, but it isn't a valid sandboxed instruction, and
- // there shouldn't be a register allocation issue to jump
- // through a scratch register, so we don't really need to bother
- // implementing it.
+ // The jmp instruction with a memory operand should be possible to
+ // encode, but it isn't a valid sandboxed instruction, and there
+ // shouldn't be a register allocation issue to jump through a scratch
+ // register, so we don't really need to bother implementing it.
llvm::report_fatal_error("Assembler can't jmp to memory operand");
}
} else if (const auto Mem = llvm::dyn_cast<
@@ -548,11 +544,10 @@
Asm->jmp(CR);
} else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Target)) {
// NaCl trampoline calls refer to an address within the sandbox directly.
- // This is usually only needed for non-IRT builds and otherwise not
- // very portable or stable. Usually this is only done for "calls"
- // and not jumps.
- // TODO(jvoung): Support this when there is a lowering that
- // actually triggers this case.
+ // This is usually only needed for non-IRT builds and otherwise not very
+ // portable or stable. Usually this is only done for "calls" and not jumps.
+ // TODO(jvoung): Support this when there is a lowering that actually
+ // triggers this case.
(void)Imm;
llvm::report_fatal_error("Unexpected jmp to absolute address");
} else {
@@ -633,10 +628,9 @@
getCallTarget()->dump(Func);
}
-// The ShiftHack parameter is used to emit "cl" instead of "ecx" for
-// shift instructions, in order to be syntactically valid. The
-// this->Opcode parameter needs to be char* and not IceString because of
-// template issues.
+// The ShiftHack parameter is used to emit "cl" instead of "ecx" for shift
+// instructions, in order to be syntactically valid. The this->Opcode parameter
+// needs to be char* and not IceString because of template issues.
template <class Machine>
void InstX86Base<Machine>::emitTwoAddress(const char *Opcode, const Inst *Inst,
const Cfg *Func, bool ShiftHack) {
@@ -802,15 +796,14 @@
&Emitter) {
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
- // Technically, the Dest Var can be mem as well, but we only use Reg.
- // We can extend this to check Dest if we decide to use that form.
+ // Technically, the Dest Var can be mem as well, but we only use Reg. We can
+ // extend this to check Dest if we decide to use that form.
assert(Var->hasReg());
// We cheat a little and use GPRRegister even for byte operations.
typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister VarReg =
InstX86Base<Machine>::Traits::RegisterSet::getEncodedByteRegOrGPR(
Ty, Var->getRegNum());
- // Src must be reg == ECX or an Imm8.
- // This is asserted by the assembler.
+ // Src must be reg == ECX or an Imm8. This is asserted by the assembler.
if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
assert(SrcVar->hasReg());
typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister SrcReg =
@@ -1337,8 +1330,8 @@
&InstX86Base<Machine>::Traits::Assembler::imul};
emitIASOpTyGPR<Machine>(Func, Ty, this->getSrc(1), Emitter);
} else {
- // We only use imul as a two-address instruction even though
- // there is a 3 operand version when one of the operands is a constant.
+ // We only use imul as a two-address instruction even though there is a 3
+ // operand version when one of the operands is a constant.
assert(Var == this->getSrc(0));
static const typename InstX86Base<
Machine>::Traits::Assembler::GPREmitterRegOp Emitter = {
@@ -1678,8 +1671,8 @@
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
assert(this->getSrcSize() == 2);
assert(Condition < InstX86Base<Machine>::Traits::Cond::Cmpps_Invalid);
- // Assuming there isn't any load folding for cmpps, and vector constants
- // are not allowed in PNaCl.
+ // Assuming there isn't any load folding for cmpps, and vector constants are
+ // not allowed in PNaCl.
assert(llvm::isa<Variable>(this->getSrc(1)));
const auto SrcVar = llvm::cast<Variable>(this->getSrc(1));
if (SrcVar->hasReg()) {
@@ -1988,8 +1981,8 @@
template <class Machine>
void InstX86Ucomiss<Machine>::emitIAS(const Cfg *Func) const {
assert(this->getSrcSize() == 2);
- // Currently src0 is always a variable by convention, to avoid having
- // two memory operands.
+ // Currently src0 is always a variable by convention, to avoid having two
+ // memory operands.
assert(llvm::isa<Variable>(this->getSrc(0)));
const auto Src0Var = llvm::cast<Variable>(this->getSrc(0));
Type Ty = Src0Var->getType();
@@ -2291,16 +2284,16 @@
: InstX86Base<Machine>::Traits::TypeAttributes[DestTy]
.SdSsString) << "\t";
}
- // For an integer truncation operation, src is wider than dest.
- // Ideally, we use a mov instruction whose data width matches the
- // narrower dest. This is a problem if e.g. src is a register like
- // esi or si where there is no 8-bit version of the register. To be
- // safe, we instead widen the dest to match src. This works even
- // for stack-allocated dest variables because typeWidthOnStack()
- // pads to a 4-byte boundary even if only a lower portion is used.
- // TODO: This assert disallows usages such as copying a floating point
- // value between a vector and a scalar (which movss is used for).
- // Clean this up.
+ // For an integer truncation operation, src is wider than dest. Ideally, we
+ // use a mov instruction whose data width matches the narrower dest. This is
+ // a problem if e.g. src is a register like esi or si where there is no 8-bit
+ // version of the register. To be safe, we instead widen the dest to match
+ // src. This works even for stack-allocated dest variables because
+ // typeWidthOnStack() pads to a 4-byte boundary even if only a lower portion
+ // is used.
+ // TODO: This assert disallows usages such as copying a floating
+ // point value between a vector and a scalar (which movss is used for). Clean
+ // this up.
assert(Func->getTarget()->typeWidthInBytesOnStack(DestTy) ==
Func->getTarget()->typeWidthInBytesOnStack(SrcTy));
Src->emit(Func);
@@ -2316,12 +2309,11 @@
Type DestTy = Dest->getType();
Type SrcTy = Src->getType();
// Mov can be used for GPRs or XMM registers. Also, the type does not
- // necessarily match (Mov can be used for bitcasts). However, when
- // the type does not match, one of the operands must be a register.
- // Thus, the strategy is to find out if Src or Dest are a register,
- // then use that register's type to decide on which emitter set to use.
- // The emitter set will include reg-reg movs, but that case should
- // be unused when the types don't match.
+ // necessarily match (Mov can be used for bitcasts). However, when the type
+ // does not match, one of the operands must be a register. Thus, the strategy
+ // is to find out if Src or Dest are a register, then use that register's
+ // type to decide on which emitter set to use. The emitter set will include
+ // reg-reg movs, but that case should be unused when the types don't match.
static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
XmmRegEmitter = {&InstX86Base<Machine>::Traits::Assembler::movss,
&InstX86Base<Machine>::Traits::Assembler::movss};
@@ -2333,16 +2325,16 @@
Machine>::Traits::Assembler::GPREmitterAddrOp GPRAddrEmitter = {
&InstX86Base<Machine>::Traits::Assembler::mov,
&InstX86Base<Machine>::Traits::Assembler::mov};
- // For an integer truncation operation, src is wider than dest.
- // Ideally, we use a mov instruction whose data width matches the
- // narrower dest. This is a problem if e.g. src is a register like
- // esi or si where there is no 8-bit version of the register. To be
- // safe, we instead widen the dest to match src. This works even
- // for stack-allocated dest variables because typeWidthOnStack()
- // pads to a 4-byte boundary even if only a lower portion is used.
- // TODO: This assert disallows usages such as copying a floating point
- // value between a vector and a scalar (which movss is used for).
- // Clean this up.
+ // For an integer truncation operation, src is wider than dest. Ideally, we
+ // use a mov instruction whose data width matches the narrower dest. This is
+ // a problem if e.g. src is a register like esi or si where there is no 8-bit
+ // version of the register. To be safe, we instead widen the dest to match
+ // src. This works even for stack-allocated dest variables because
+ // typeWidthOnStack() pads to a 4-byte boundary even if only a lower portion
+ // is used.
+ // TODO: This assert disallows usages such as copying a floating
+ // point value between a vector and a scalar (which movss is used for). Clean
+ // this up.
assert(
Func->getTarget()->typeWidthInBytesOnStack(this->getDest()->getType()) ==
Func->getTarget()->typeWidthInBytesOnStack(Src->getType()));
@@ -2375,8 +2367,8 @@
return;
}
} else {
- // Dest must be Stack and Src *could* be a register. Use Src's type
- // to decide on the emitters.
+ // Dest must be Stack and Src *could* be a register. Use Src's type to
+ // decide on the emitters.
typename InstX86Base<Machine>::Traits::Address StackAddr(
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
Func->getTarget())
@@ -2409,8 +2401,8 @@
assert(this->getSrcSize() == 1);
const Variable *Dest = this->getDest();
const auto SrcVar = llvm::cast<Variable>(this->getSrc(0));
- // For insert/extract element (one of Src/Dest is an Xmm vector and
- // the other is an int type).
+ // For insert/extract element (one of Src/Dest is an Xmm vector and the other
+ // is an int type).
if (SrcVar->getType() == IceType_i32 ||
(InstX86Base<Machine>::Traits::Is64Bit &&
SrcVar->getType() == IceType_i64)) {
@@ -2464,10 +2456,9 @@
void InstX86Movp<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
- // TODO(wala,stichnot): movups works with all vector operands, but
- // there exist other instructions (movaps, movdqa, movdqu) that may
- // perform better, depending on the data type and alignment of the
- // operands.
+ // TODO(wala,stichnot): movups works with all vector operands, but there
+ // exist other instructions (movaps, movdqa, movdqu) that may perform better,
+ // depending on the data type and alignment of the operands.
Ostream &Str = Func->getContext()->getStrEmit();
assert(this->getSrcSize() == 1);
Str << "\tmovups\t";
@@ -2521,8 +2512,8 @@
template <class Machine>
void InstX86MovssRegs<Machine>::emitIAS(const Cfg *Func) const {
- // This is Binop variant is only intended to be used for reg-reg moves
- // where part of the Dest register is untouched.
+ // This is Binop variant is only intended to be used for reg-reg moves where
+ // part of the Dest register is untouched.
assert(this->getSrcSize() == 2);
const Variable *Dest = this->getDest();
assert(Dest == this->getSrc(0));
@@ -2542,9 +2533,9 @@
assert(this->getSrcSize() == 1);
const Variable *Dest = this->getDest();
const Operand *Src = this->getSrc(0);
- // Dest must be a > 8-bit register, but Src can be 8-bit. In practice
- // we just use the full register for Dest to avoid having an
- // OperandSizeOverride prefix. It also allows us to only dispatch on SrcTy.
+ // Dest must be a > 8-bit register, but Src can be 8-bit. In practice we just
+ // use the full register for Dest to avoid having an OperandSizeOverride
+ // prefix. It also allows us to only dispatch on SrcTy.
Type SrcTy = Src->getType();
assert(typeWidthInBytes(Dest->getType()) > 1);
assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy));
@@ -2596,8 +2587,8 @@
SizeT Width = typeWidthInBytes(Ty);
const auto Var = llvm::dyn_cast<Variable>(this->getSrc(0));
if (Var && Var->hasReg()) {
- // This is a physical xmm register, so we need to spill it to a
- // temporary stack slot.
+ // This is a physical xmm register, so we need to spill it to a temporary
+ // stack slot.
Str << "\tsubl\t$" << Width << ", %esp"
<< "\n";
Str << "\tmov"
@@ -2622,8 +2613,8 @@
Type Ty = Src->getType();
if (const auto Var = llvm::dyn_cast<Variable>(Src)) {
if (Var->hasReg()) {
- // This is a physical xmm register, so we need to spill it to a
- // temporary stack slot.
+ // This is a physical xmm register, so we need to spill it to a temporary
+ // stack slot.
Immediate Width(typeWidthInBytes(Ty));
Asm->sub(IceType_i32,
InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
@@ -2672,9 +2663,8 @@
Ostream &Str = Func->getContext()->getStrEmit();
assert(this->getSrcSize() == 0);
// TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to
- // "partially" delete the fstp if the Dest is unused.
- // Even if Dest is unused, the fstp should be kept for the SideEffects
- // of popping the stack.
+ // "partially" delete the fstp if the Dest is unused. Even if Dest is unused,
+ // the fstp should be kept for the SideEffects of popping the stack.
if (!this->getDest()) {
Str << "\tfstp\tst(0)";
return;
@@ -2686,10 +2676,9 @@
this->getDest()->emit(Func);
return;
}
- // Dest is a physical (xmm) register, so st(0) needs to go through
- // memory. Hack this by creating a temporary stack slot, spilling
- // st(0) there, loading it into the xmm register, and deallocating
- // the stack slot.
+ // Dest is a physical (xmm) register, so st(0) needs to go through memory.
+ // Hack this by creating a temporary stack slot, spilling st(0) there,
+ // loading it into the xmm register, and deallocating the stack slot.
Str << "\tsubl\t$" << Width << ", %esp\n";
Str << "\tfstp" << this->getFldString(Ty) << "\t"
<< "(%esp)\n";
@@ -2708,9 +2697,8 @@
assert(this->getSrcSize() == 0);
const Variable *Dest = this->getDest();
// TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to
- // "partially" delete the fstp if the Dest is unused.
- // Even if Dest is unused, the fstp should be kept for the SideEffects
- // of popping the stack.
+ // "partially" delete the fstp if the Dest is unused. Even if Dest is unused,
+ // the fstp should be kept for the SideEffects of popping the stack.
if (!Dest) {
Asm->fstp(InstX86Base<Machine>::Traits::RegisterSet::getEncodedSTReg(0));
return;
@@ -2723,10 +2711,9 @@
->stackVarToAsmOperand(Dest));
Asm->fstp(Ty, StackAddr);
} else {
- // Dest is a physical (xmm) register, so st(0) needs to go through
- // memory. Hack this by creating a temporary stack slot, spilling
- // st(0) there, loading it into the xmm register, and deallocating
- // the stack slot.
+ // Dest is a physical (xmm) register, so st(0) needs to go through memory.
+ // Hack this by creating a temporary stack slot, spilling st(0) there,
+ // loading it into the xmm register, and deallocating the stack slot.
Immediate Width(typeWidthInBytes(Ty));
Asm->sub(IceType_i32,
InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, Width);
@@ -2796,9 +2783,9 @@
this->getSrc(0)->emit(Func);
Str << ", ";
Variable *Dest = this->getDest();
- // pextrw must take a register dest. There is an SSE4.1 version that takes
- // a memory dest, but we aren't using it. For uniformity, just restrict
- // them all to have a register dest for now.
+ // pextrw must take a register dest. There is an SSE4.1 version that takes a
+ // memory dest, but we aren't using it. For uniformity, just restrict them
+ // all to have a register dest for now.
assert(Dest->hasReg());
Dest->asType(IceType_i32)->emit(Func);
}
@@ -2813,9 +2800,9 @@
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
Func->getTarget())
->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
- // pextrw must take a register dest. There is an SSE4.1 version that takes
- // a memory dest, but we aren't using it. For uniformity, just restrict
- // them all to have a register dest for now.
+ // pextrw must take a register dest. There is an SSE4.1 version that takes a
+ // memory dest, but we aren't using it. For uniformity, just restrict them
+ // all to have a register dest for now.
assert(Dest->hasReg());
// pextrw's Src(0) must be a register (both SSE4.1 and SSE2).
assert(llvm::cast<Variable>(this->getSrc(0))->hasReg());
@@ -2876,10 +2863,9 @@
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
Func->getTarget())
->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
- // If src1 is a register, it should always be r32 (this should fall out
- // from the encodings for ByteRegs overlapping the encodings for r32),
- // but we have to trust the regalloc to not choose "ah", where it
- // doesn't overlap.
+ // If src1 is a register, it should always be r32 (this should fall out from
+ // the encodings for ByteRegs overlapping the encodings for r32), but we have
+ // to trust the regalloc to not choose "ah", where it doesn't overlap.
static const typename InstX86Base<Machine>::Traits::Assembler::
template ThreeOpImmEmitter<
typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
diff --git a/src/IceIntrinsics.cpp b/src/IceIntrinsics.cpp
index 1dc25cc..bbbf086 100644
--- a/src/IceIntrinsics.cpp
+++ b/src/IceIntrinsics.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements the Intrinsics utilities for matching and
-/// then dispatching by name.
+/// This file implements the Intrinsics utilities for matching and then
+/// dispatching by name.
///
//===----------------------------------------------------------------------===//
@@ -33,8 +33,8 @@
#define INTRIN(ID, SE, RT) \
{ Intrinsics::ID, Intrinsics::SE, Intrinsics::RT }
-// Build list of intrinsics with their attributes and expected prototypes.
-// List is sorted alphabetically.
+// Build list of intrinsics with their attributes and expected prototypes. List
+// is sorted alphabetically.
const struct IceIntrinsicsEntry_ {
Intrinsics::FullIntrinsicInfo Info;
const char *IntrinsicName;
@@ -279,8 +279,8 @@
case AtomicRMW:
return true;
case AtomicCmpxchg:
- // Reject orderings that are disallowed by C++11 as invalid
- // combinations for cmpxchg.
+ // Reject orderings that are disallowed by C++11 as invalid combinations
+ // for cmpxchg.
switch (OrderOther) {
case MemoryOrderRelaxed:
case MemoryOrderConsume:
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index 9270aa4..208c3c1 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -62,11 +62,10 @@
Trap
};
- /// Operations that can be represented by the AtomicRMW
- /// intrinsic.
+ /// Operations that can be represented by the AtomicRMW intrinsic.
///
- /// Do not reorder these values: their order offers forward
- /// compatibility of bitcode targeted to PNaCl.
+ /// Do not reorder these values: their order offers forward compatibility of
+ /// bitcode targeted to PNaCl.
enum AtomicRMWOperation {
AtomicInvalid = 0, // Invalid, keep first.
AtomicAdd,
@@ -80,8 +79,8 @@
/// Memory orderings supported by PNaCl IR.
///
- /// Do not reorder these values: their order offers forward
- /// compatibility of bitcode targeted to PNaCl.
+ /// Do not reorder these values: their order offers forward compatibility of
+ /// bitcode targeted to PNaCl.
enum MemoryOrder {
MemoryOrderInvalid = 0, // Invalid, keep first.
MemoryOrderRelaxed,
@@ -93,11 +92,11 @@
MemoryOrderNum // Invalid, keep last.
};
- /// Verify memory ordering rules for atomic intrinsics. For
- /// AtomicCmpxchg, Order is the "success" ordering and OrderOther is
- /// the "failure" ordering. Returns true if valid, false if invalid.
- // TODO(stichnot,kschimpf): Perform memory order validation in the
- // bitcode reader/parser, allowing LLVM and Subzero to share. See
+ /// Verify memory ordering rules for atomic intrinsics. For AtomicCmpxchg,
+ /// Order is the "success" ordering and OrderOther is the "failure" ordering.
+ /// Returns true if valid, false if invalid.
+ // TODO(stichnot,kschimpf): Perform memory order validation in the bitcode
+ // reader/parser, allowing LLVM and Subzero to share. See
// https://code.google.com/p/nativeclient/issues/detail?id=4126 .
static bool isMemoryOrderValid(IntrinsicID ID, uint64_t Order,
uint64_t OrderOther = MemoryOrderInvalid);
@@ -106,10 +105,10 @@
enum ReturnsTwice { ReturnsTwice_F = 0, ReturnsTwice_T = 1 };
- /// Basic attributes related to each intrinsic, that are relevant to
- /// code generation. Perhaps the attributes representation can be shared
- /// with general function calls, but PNaCl currently strips all
- /// attributes from functions.
+ /// Basic attributes related to each intrinsic, that are relevant to code
+ /// generation. Perhaps the attributes representation can be shared with
+ /// general function calls, but PNaCl currently strips all attributes from
+ /// functions.
struct IntrinsicInfo {
enum IntrinsicID ID : 30;
enum SideEffects HasSideEffects : 1;
@@ -132,9 +131,9 @@
Type Signature[kMaxIntrinsicParameters];
uint8_t NumTypes;
- /// Validates that type signature of call matches intrinsic.
- /// If WrongArgumentType is returned, ArgIndex is set to corresponding
- /// argument index.
+ /// Validates that type signature of call matches intrinsic. If
+ /// WrongArgumentType is returned, ArgIndex is set to corresponding argument
+ /// index.
ValidateCallValue validateCall(const Ice::InstCall *Call,
SizeT &ArgIndex) const;
@@ -154,11 +153,11 @@
Type getArgType(SizeT Index) const;
};
- /// Find the information about a given intrinsic, based on function name. If
+ /// Find the information about a given intrinsic, based on function name. If
/// the function name does not have the common "llvm." prefix, nullptr is
- /// returned and Error is set to false. Otherwise, tries to find a reference
- /// to a FullIntrinsicInfo entry (valid for the lifetime of the map). If
- /// found, sets Error to false and returns the reference. If not found, sets
+ /// returned and Error is set to false. Otherwise, tries to find a reference
+ /// to a FullIntrinsicInfo entry (valid for the lifetime of the map). If
+ /// found, sets Error to false and returns the reference. If not found, sets
/// Error to true and returns nullptr (indicating an unknown "llvm.foo"
/// intrinsic).
const FullIntrinsicInfo *find(const IceString &Name, bool &Error) const;
diff --git a/src/IceLiveness.cpp b/src/IceLiveness.cpp
index 15877b2..25cfd09 100644
--- a/src/IceLiveness.cpp
+++ b/src/IceLiveness.cpp
@@ -8,15 +8,14 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file provides some of the support for the Liveness class. In
-/// particular, it handles the sparsity representation of the mapping
-/// between Variables and CfgNodes. The idea is that since most
-/// variables are used only within a single basic block, we can
-/// partition the variables into "local" and "global" sets. Instead of
-/// sizing and indexing vectors according to Variable::Number, we
-/// create a mapping such that global variables are mapped to low
-/// indexes that are common across nodes, and local variables are
-/// mapped to a higher index space that is shared across nodes.
+/// This file provides some of the support for the Liveness class. In
+/// particular, it handles the sparsity representation of the mapping between
+/// Variables and CfgNodes. The idea is that since most variables are used only
+/// within a single basic block, we can partition the variables into "local" and
+/// "global" sets. Instead of sizing and indexing vectors according to
+/// Variable::Number, we create a mapping such that global variables are mapped
+/// to low indexes that are common across nodes, and local variables are mapped
+/// to a higher index space that is shared across nodes.
///
//===----------------------------------------------------------------------===//
@@ -32,7 +31,7 @@
// Initializes the basic liveness-related data structures for full liveness
// analysis (IsFullInit=true), or for incremental update after phi lowering
-// (IsFullInit=false). In the latter case, FirstNode points to the first node
+// (IsFullInit=false). In the latter case, FirstNode points to the first node
// added since starting phi lowering, and FirstVar points to the first Variable
// added since starting phi lowering.
void Liveness::initInternal(NodeList::const_iterator FirstNode,
@@ -60,7 +59,7 @@
else
assert(TmpNumGlobals == 0);
- // Resize each LivenessNode::LiveToVarMap, and the global LiveToVarMap. Reset
+ // Resize each LivenessNode::LiveToVarMap, and the global LiveToVarMap. Reset
// the counts to 0.
for (auto I = FirstNode, E = Func->getNodes().end(); I != E; ++I) {
LivenessNode &N = Nodes[(*I)->getIndex()];
@@ -75,7 +74,7 @@
RangeMask.resize(NumVars);
RangeMask.set(0, NumVars); // Track all variables by default.
- // Sort each variable into the appropriate LiveToVarMap. Set VarToLiveMap.
+ // Sort each variable into the appropriate LiveToVarMap. Set VarToLiveMap.
// Set RangeMask correctly for each variable.
TmpNumGlobals = 0;
for (auto I = FirstVar, E = Func->getVariables().end(); I != E; ++I) {
@@ -112,8 +111,7 @@
// NumLocals, LiveToVarMap already initialized
Node.LiveIn.resize(NumGlobals);
Node.LiveOut.resize(NumGlobals);
- // LiveBegin and LiveEnd are reinitialized before each pass over
- // the block.
+ // LiveBegin and LiveEnd are reinitialized before each pass over the block.
}
}
diff --git a/src/IceLiveness.h b/src/IceLiveness.h
index 895138d..bd739d3 100644
--- a/src/IceLiveness.h
+++ b/src/IceLiveness.h
@@ -8,12 +8,11 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares the Liveness and LivenessNode classes,
-/// which are used for liveness analysis. The node-specific
-/// information tracked for each Variable includes whether it is
-/// live on entry, whether it is live on exit, the instruction number
-/// that starts its live range, and the instruction number that ends
-/// its live range. At the Cfg level, the actual live intervals are
+/// This file declares the Liveness and LivenessNode classes, which are used for
+/// liveness analysis. The node-specific information tracked for each Variable
+/// includes whether it is live on entry, whether it is live on exit, the
+/// instruction number that starts its live range, and the instruction number
+/// that ends its live range. At the Cfg level, the actual live intervals are
/// recorded.
///
//===----------------------------------------------------------------------===//
@@ -41,22 +40,20 @@
/// NumLocals is the number of Variables local to this block.
SizeT NumLocals = 0;
/// NumNonDeadPhis tracks the number of Phi instructions that
- /// Inst::liveness() identified as tentatively live. If
- /// NumNonDeadPhis changes from the last liveness pass, then liveness
- /// has not yet converged.
+ /// Inst::liveness() identified as tentatively live. If NumNonDeadPhis
+ /// changes from the last liveness pass, then liveness has not yet
+ /// converged.
SizeT NumNonDeadPhis = 0;
- // LiveToVarMap maps a liveness bitvector index to a Variable. This
- // is generally just for printing/dumping. The index should be less
- // than NumLocals + Liveness::NumGlobals.
+ // LiveToVarMap maps a liveness bitvector index to a Variable. This is
+ // generally just for printing/dumping. The index should be less than
+ // NumLocals + Liveness::NumGlobals.
std::vector<Variable *> LiveToVarMap;
// LiveIn and LiveOut track the in- and out-liveness of the global
- // variables. The size of each vector is
- // LivenessNode::NumGlobals.
+ // variables. The size of each vector is LivenessNode::NumGlobals.
LivenessBV LiveIn, LiveOut;
- // LiveBegin and LiveEnd track the instruction numbers of the start
- // and end of each variable's live range within this block. The
- // index/key of each element is less than NumLocals +
- // Liveness::NumGlobals.
+ // LiveBegin and LiveEnd track the instruction numbers of the start and end
+ // of each variable's live range within this block. The index/key of each
+ // element is less than NumLocals + Liveness::NumGlobals.
LiveBeginEndMap LiveBegin, LiveEnd;
};
@@ -111,11 +108,11 @@
SizeT NumGlobals = 0;
/// Size of Nodes is Cfg::Nodes.size().
std::vector<LivenessNode> Nodes;
- /// VarToLiveMap maps a Variable's Variable::Number to its live index
- /// within its basic block.
+ /// VarToLiveMap maps a Variable's Variable::Number to its live index within
+ /// its basic block.
std::vector<SizeT> VarToLiveMap;
- /// LiveToVarMap is analogous to LivenessNode::LiveToVarMap, but for
- /// non-local variables.
+ /// LiveToVarMap is analogous to LivenessNode::LiveToVarMap, but for non-local
+ /// variables.
std::vector<Variable *> LiveToVarMap;
/// RangeMask[Variable::Number] indicates whether we want to track that
/// Variable's live range.
diff --git a/src/IceLoopAnalyzer.cpp b/src/IceLoopAnalyzer.cpp
index e2f7487..4e1b549 100644
--- a/src/IceLoopAnalyzer.cpp
+++ b/src/IceLoopAnalyzer.cpp
@@ -121,9 +121,9 @@
return nullptr;
}
- // Reaching here means a loop has been found! It consists of the nodes on
- // the top of the stack, down until the current node being processed, Node,
- // is found.
+ // Reaching here means a loop has been found! It consists of the nodes on the
+ // top of the stack, down until the current node being processed, Node, is
+ // found.
for (auto It = LoopStack.rbegin(); It != LoopStack.rend(); ++It) {
(*It)->setOnStack(false);
(*It)->incrementLoopNestDepth();
diff --git a/src/IceLoopAnalyzer.h b/src/IceLoopAnalyzer.h
index 5991798..19d38d1 100644
--- a/src/IceLoopAnalyzer.h
+++ b/src/IceLoopAnalyzer.h
@@ -35,6 +35,10 @@
///
/// This only computes the loop nest depth within the function and does not
/// take into account whether the function was called from within a loop.
+ // TODO(ascull): this currently uses a extension of Tarjan's algorithm with
+ // is bounded linear. ncbray suggests another algorithm which is linear in
+ // practice but not bounded linear. I think it also finds dominators.
+ // http://lenx.100871.net/papers/loop-SAS.pdf
void computeLoopNestDepth();
private:
@@ -88,11 +92,11 @@
using LoopNodePtrList =
std::vector<LoopNode *, CfgLocalAllocator<LoopNode *>>;
- /// Process the node as part as part of Tarjan's algorithm and return either
- /// a node to recurse into or nullptr when the node has been fully processed.
+ /// Process the node as part as part of Tarjan's algorithm and return either a
+ /// node to recurse into or nullptr when the node has been fully processed.
LoopNode *processNode(LoopNode &Node);
- /// The fuction to analyze for loops.
+ /// The function to analyze for loops.
Cfg *const Func;
/// A list of decorated nodes in the same order as Func->getNodes() which
/// means the node's index will also be valid in this list.
diff --git a/src/IceOperand.cpp b/src/IceOperand.cpp
index 2013dcf..125c692 100644
--- a/src/IceOperand.cpp
+++ b/src/IceOperand.cpp
@@ -48,10 +48,9 @@
Range.push_back(RangeElementType(Start, End));
}
-// Returns true if this live range ends before Other's live range
-// starts. This means that the highest instruction number in this
-// live range is less than or equal to the lowest instruction number
-// of the Other live range.
+// Returns true if this live range ends before Other's live range starts. This
+// means that the highest instruction number in this live range is less than or
+// equal to the lowest instruction number of the Other live range.
bool LiveRange::endsBefore(const LiveRange &Other) const {
// Neither range should be empty, but let's be graceful.
if (Range.empty() || Other.Range.empty())
@@ -94,10 +93,10 @@
break;
}
}
- // This is an equivalent but less inefficient implementation. It's
- // expensive enough that we wouldn't want to run it under any build,
- // but it could be enabled if e.g. the LiveRange implementation
- // changes and extra testing is needed.
+ // This is an equivalent but less inefficient implementation. It's expensive
+ // enough that we wouldn't want to run it under any build, but it could be
+ // enabled if e.g. the LiveRange implementation changes and extra testing is
+ // needed.
if (BuildDefs::extraValidation()) {
LiveRange Temp;
Temp.addSegment(OtherBegin, OtherBegin + 1);
@@ -108,11 +107,10 @@
return Result;
}
-// Returns true if the live range contains the given instruction
-// number. This is only used for validating the live range
-// calculation. The IsDest argument indicates whether the Variable
-// being tested is used in the Dest position (as opposed to a Src
-// position).
+// Returns true if the live range contains the given instruction number. This
+// is only used for validating the live range calculation. The IsDest argument
+// indicates whether the Variable being tested is used in the Dest position (as
+// opposed to a Src position).
bool LiveRange::containsValue(InstNumberT Value, bool IsDest) const {
for (const RangeElementType &I : Range) {
if (I.first <= Value &&
@@ -134,8 +132,8 @@
}
Variable *Variable::asType(Type Ty) {
- // Note: This returns a Variable, even if the "this" object is a
- // subclass of Variable.
+ // Note: This returns a Variable, even if the "this" object is a subclass of
+ // Variable.
if (!BuildDefs::dump() || getType() == Ty)
return this;
Variable *V = new (getCurrentCfgAllocator()->Allocate<Variable>())
@@ -171,20 +169,19 @@
if (MultiBlock == MBS_MultiBlock)
return;
- // TODO(stichnot): If the use occurs as a source operand in the
- // first instruction of the block, and its definition is in this
- // block's only predecessor, we might consider not marking this as a
- // separate use. This may also apply if it's the first instruction
- // of the block that actually uses a Variable.
+ // TODO(stichnot): If the use occurs as a source operand in the first
+ // instruction of the block, and its definition is in this block's only
+ // predecessor, we might consider not marking this as a separate use. This
+ // may also apply if it's the first instruction of the block that actually
+ // uses a Variable.
assert(Node);
bool MakeMulti = false;
if (IsImplicit)
MakeMulti = true;
- // A phi source variable conservatively needs to be marked as
- // multi-block, even if its definition is in the same block. This
- // is because there can be additional control flow before branching
- // back to this node, and the variable is live throughout those
- // nodes.
+ // A phi source variable conservatively needs to be marked as multi-block,
+ // even if its definition is in the same block. This is because there can be
+ // additional control flow before branching back to this node, and the
+ // variable is live throughout those nodes.
if (Instr && llvm::isa<InstPhi>(Instr))
MakeMulti = true;
@@ -211,10 +208,10 @@
void VariableTracking::markDef(MetadataKind TrackingKind, const Inst *Instr,
CfgNode *Node) {
- // TODO(stichnot): If the definition occurs in the last instruction
- // of the block, consider not marking this as a separate use. But
- // be careful not to omit all uses of the variable if markDef() and
- // markUse() both use this optimization.
+ // TODO(stichnot): If the definition occurs in the last instruction of the
+ // block, consider not marking this as a separate use. But be careful not to
+ // omit all uses of the variable if markDef() and markUse() both use this
+ // optimization.
assert(Node);
// Verify that instructions are added in increasing order.
#ifndef NDEBUG
@@ -517,8 +514,7 @@
// =========== Immediate Randomization and Pooling routines ==============
// Specialization of the template member function for ConstantInteger32
-// TODO(stichnot): try to move this specialization into a target-specific
-// file.
+// TODO(stichnot): try to move this specialization into a target-specific file.
template <>
bool ConstantInteger32::shouldBeRandomizedOrPooled(const GlobalContext *Ctx) {
uint32_t Threshold = Ctx->getFlags().getRandomizeAndPoolImmediatesThreshold();
diff --git a/src/IceOperand.h b/src/IceOperand.h
index 8bec48e..b4e06be 100644
--- a/src/IceOperand.h
+++ b/src/IceOperand.h
@@ -45,11 +45,11 @@
kVariable,
kVariable_Target, // leave space for target-specific variable kinds
kVariable_Max = kVariable_Target + MaxTargetKinds,
- // Target-specific operand classes use kTarget as the starting
- // point for their Kind enum space. Note that the value-spaces are shared
- // across targets. To avoid confusion over the definition of shared
- // values, an object specific to one target should never be passed
- // to a different target.
+ // Target-specific operand classes use kTarget as the starting point for
+ // their Kind enum space. Note that the value-spaces are shared across
+ // targets. To avoid confusion over the definition of shared values, an
+ // object specific to one target should never be passed to a different
+ // target.
kTarget,
kTarget_Max = std::numeric_limits<uint8_t>::max(),
};
@@ -70,8 +70,8 @@
/// \name Dumping functions.
/// @{
- /// The dump(Func,Str) implementation must be sure to handle the
- /// situation where Func==nullptr.
+ /// The dump(Func,Str) implementation must be sure to handle the situation
+ /// where Func==nullptr.
virtual void dump(const Cfg *Func, Ostream &Str) const = 0;
void dump(const Cfg *Func) const {
if (!BuildDefs::dump())
@@ -105,8 +105,8 @@
return Str;
}
-/// Constant is the abstract base class for constants. All
-/// constants are allocated from a global arena and are pooled.
+/// Constant is the abstract base class for constants. All constants are
+/// allocated from a global arena and are pooled.
class Constant : public Operand {
Constant() = delete;
Constant(const Constant &) = delete;
@@ -124,9 +124,9 @@
return Kind >= kConst_Base && Kind <= kConst_Max;
}
- /// Judge if this given immediate should be randomized or pooled
- /// By default should return false, only constant integers should
- /// truly go through this method.
+ /// Judge if this given immediate should be randomized or pooled By default
+ /// should return false, only constant integers should truly go through this
+ /// method.
virtual bool shouldBeRandomizedOrPooled(const GlobalContext *Ctx) {
(void)Ctx;
return false;
@@ -142,9 +142,9 @@
Vars = nullptr;
NumVars = 0;
}
- /// PoolEntryID is an integer that uniquely identifies the constant
- /// within its constant pool. It is used for building the constant
- /// pool in the object code and for referencing its entries.
+ /// PoolEntryID is an integer that uniquely identifies the constant within its
+ /// constant pool. It is used for building the constant pool in the object
+ /// code and for referencing its entries.
const uint32_t PoolEntryID;
/// Whether we should pool this constant. Usually Float/Double and pooled
/// Integers should be flagged true.
@@ -219,10 +219,9 @@
Str << static_cast<int64_t>(getValue());
}
-/// RelocatableTuple bundles the parameters that are used to
-/// construct an ConstantRelocatable. It is done this way so that
-/// ConstantRelocatable can fit into the global constant pool
-/// template mechanism.
+/// RelocatableTuple bundles the parameters that are used to construct an
+/// ConstantRelocatable. It is done this way so that ConstantRelocatable can fit
+/// into the global constant pool template mechanism.
class RelocatableTuple {
RelocatableTuple() = delete;
RelocatableTuple &operator=(const RelocatableTuple &) = delete;
@@ -240,8 +239,8 @@
bool operator==(const RelocatableTuple &A, const RelocatableTuple &B);
-/// ConstantRelocatable represents a symbolic constant combined with
-/// a fixed offset.
+/// ConstantRelocatable represents a symbolic constant combined with a fixed
+/// offset.
class ConstantRelocatable : public Constant {
ConstantRelocatable() = delete;
ConstantRelocatable(const ConstantRelocatable &) = delete;
@@ -282,9 +281,9 @@
bool SuppressMangling;
};
-/// ConstantUndef represents an unspecified bit pattern. Although it is
-/// legal to lower ConstantUndef to any value, backends should try to
-/// make code generation deterministic by lowering ConstantUndefs to 0.
+/// ConstantUndef represents an unspecified bit pattern. Although it is legal to
+/// lower ConstantUndef to any value, backends should try to make code
+/// generation deterministic by lowering ConstantUndefs to 0.
class ConstantUndef : public Constant {
ConstantUndef() = delete;
ConstantUndef(const ConstantUndef &) = delete;
@@ -315,9 +314,9 @@
: Constant(kConstUndef, Ty, PoolEntryID) {}
};
-/// RegWeight is a wrapper for a uint32_t weight value, with a
-/// special value that represents infinite weight, and an addWeight()
-/// method that ensures that W+infinity=infinity.
+/// RegWeight is a wrapper for a uint32_t weight value, with a special value
+/// that represents infinite weight, and an addWeight() method that ensures that
+/// W+infinity=infinity.
class RegWeight {
public:
RegWeight() = default;
@@ -346,15 +345,15 @@
bool operator<=(const RegWeight &A, const RegWeight &B);
bool operator==(const RegWeight &A, const RegWeight &B);
-/// LiveRange is a set of instruction number intervals representing
-/// a variable's live range. Generally there is one interval per basic
-/// block where the variable is live, but adjacent intervals get
-/// coalesced into a single interval.
+/// LiveRange is a set of instruction number intervals representing a variable's
+/// live range. Generally there is one interval per basic block where the
+/// variable is live, but adjacent intervals get coalesced into a single
+/// interval.
class LiveRange {
public:
LiveRange() = default;
- /// Special constructor for building a kill set. The advantage is
- /// that we can reserve the right amount of space in advance.
+ /// Special constructor for building a kill set. The advantage is that we can
+ /// reserve the right amount of space in advance.
explicit LiveRange(const std::vector<InstNumberT> &Kills) {
Range.reserve(Kills.size());
for (InstNumberT I : Kills)
@@ -392,22 +391,21 @@
using RangeType =
std::vector<RangeElementType, CfgLocalAllocator<RangeElementType>>;
RangeType Range;
- /// TrimmedBegin is an optimization for the overlaps() computation.
- /// Since the linear-scan algorithm always calls it as overlaps(Cur)
- /// and Cur advances monotonically according to live range start, we
- /// can optimize overlaps() by ignoring all segments that end before
- /// the start of Cur's range. The linear-scan code enables this by
- /// calling trim() on the ranges of interest as Cur advances. Note
- /// that linear-scan also has to initialize TrimmedBegin at the
- /// beginning by calling untrim().
+ /// TrimmedBegin is an optimization for the overlaps() computation. Since the
+ /// linear-scan algorithm always calls it as overlaps(Cur) and Cur advances
+ /// monotonically according to live range start, we can optimize overlaps() by
+ /// ignoring all segments that end before the start of Cur's range. The
+ /// linear-scan code enables this by calling trim() on the ranges of interest
+ /// as Cur advances. Note that linear-scan also has to initialize TrimmedBegin
+ /// at the beginning by calling untrim().
RangeType::const_iterator TrimmedBegin;
};
Ostream &operator<<(Ostream &Str, const LiveRange &L);
/// Variable represents an operand that is register-allocated or
-/// stack-allocated. If it is register-allocated, it will ultimately
-/// have a non-negative RegNum field.
+/// stack-allocated. If it is register-allocated, it will ultimately have a
+/// non-negative RegNum field.
class Variable : public Operand {
Variable() = delete;
Variable(const Variable &) = delete;
@@ -495,11 +493,11 @@
LoVar = Lo;
HiVar = Hi;
}
- /// Creates a temporary copy of the variable with a different type.
- /// Used primarily for syntactic correctness of textual assembly
- /// emission. Note that only basic information is copied, in
- /// particular not IsArgument, IsImplicitArgument, IgnoreLiveness,
- /// RegNumTmp, Live, LoVar, HiVar, VarsReal.
+ /// Creates a temporary copy of the variable with a different type. Used
+ /// primarily for syntactic correctness of textual assembly emission. Note
+ /// that only basic information is copied, in particular not IsArgument,
+ /// IsImplicitArgument, IgnoreLiveness, RegNumTmp, Live, LoVar, HiVar,
+ /// VarsReal.
Variable *asType(Type Ty);
void emit(const Cfg *Func) const override;
@@ -521,18 +519,18 @@
Vars[0] = this;
NumVars = 1;
}
- /// Number is unique across all variables, and is used as a
- /// (bit)vector index for liveness analysis.
+ /// Number is unique across all variables, and is used as a (bit)vector index
+ /// for liveness analysis.
const SizeT Number;
Cfg::IdentifierIndexType NameIndex = Cfg::IdentifierIndexInvalid;
bool IsArgument = false;
bool IsImplicitArgument = false;
- /// IgnoreLiveness means that the variable should be ignored when
- /// constructing and validating live ranges. This is usually
- /// reserved for the stack pointer.
+ /// IgnoreLiveness means that the variable should be ignored when constructing
+ /// and validating live ranges. This is usually reserved for the stack
+ /// pointer.
bool IgnoreLiveness = false;
- /// StackOffset is the canonical location on stack (only if
- /// RegNum==NoRegister || IsArgument).
+ /// StackOffset is the canonical location on stack (only if RegNum==NoRegister
+ /// || IsArgument).
int32_t StackOffset = 0;
/// RegNum is the allocated register, or NoRegister if it isn't
/// register-allocated.
@@ -541,17 +539,15 @@
int32_t RegNumTmp = NoRegister;
RegRequirement RegRequirement = RR_MayHaveRegister;
LiveRange Live;
- // LoVar and HiVar are needed for lowering from 64 to 32 bits. When
- // lowering from I64 to I32 on a 32-bit architecture, we split the
- // variable into two machine-size pieces. LoVar is the low-order
- // machine-size portion, and HiVar is the remaining high-order
- // portion. TODO: It's wasteful to penalize all variables on all
- // targets this way; use a sparser representation. It's also
- // wasteful for a 64-bit target.
+ // LoVar and HiVar are needed for lowering from 64 to 32 bits. When lowering
+ // from I64 to I32 on a 32-bit architecture, we split the variable into two
+ // machine-size pieces. LoVar is the low-order machine-size portion, and
+ // HiVar is the remaining high-order portion.
+ // TODO: It's wasteful to penalize all variables on all targets this way; use
+ // a sparser representation. It's also wasteful for a 64-bit target.
Variable *LoVar = nullptr;
Variable *HiVar = nullptr;
- /// VarsReal (and Operand::Vars) are set up such that Vars[0] ==
- /// this.
+ /// VarsReal (and Operand::Vars) are set up such that Vars[0] == this.
Variable *VarsReal[1];
};
@@ -611,13 +607,12 @@
public:
explicit VariablesMetadata(const Cfg *Func) : Func(Func) {}
- /// Initialize the state by traversing all instructions/variables in
- /// the CFG.
+ /// Initialize the state by traversing all instructions/variables in the CFG.
void init(MetadataKind TrackingKind);
- /// Add a single node. This is called by init(), and can be called
+ /// Add a single node. This is called by init(), and can be called
/// incrementally from elsewhere, e.g. after edge-splitting.
void addNode(CfgNode *Node);
- /// Returns whether the given Variable is tracked in this object. It should
+ /// Returns whether the given Variable is tracked in this object. It should
/// only return false if changes were made to the CFG after running init(), in
/// which case the state is stale and the results shouldn't be trusted (but it
/// may be OK e.g. for dumping).
@@ -627,29 +622,27 @@
/// Returns whether the given Variable has multiple definitions.
bool isMultiDef(const Variable *Var) const;
- /// Returns the first definition instruction of the given Variable. This is
+ /// Returns the first definition instruction of the given Variable. This is
/// only valid for variables whose definitions are all within the same block,
/// e.g. T after the lowered sequence "T=B; T+=C; A=T", for which
- /// getFirstDefinition(T) would return the "T=B" instruction. For variables
+ /// getFirstDefinition(T) would return the "T=B" instruction. For variables
/// with definitions span multiple blocks, nullptr is returned.
const Inst *getFirstDefinition(const Variable *Var) const;
- /// Returns the definition instruction of the given Variable, when
- /// the variable has exactly one definition. Otherwise, nullptr is
- /// returned.
+ /// Returns the definition instruction of the given Variable, when the
+ /// variable has exactly one definition. Otherwise, nullptr is returned.
const Inst *getSingleDefinition(const Variable *Var) const;
/// Returns the list of all definition instructions of the given Variable.
const InstDefList &getLatterDefinitions(const Variable *Var) const;
- /// Returns whether the given Variable is live across multiple
- /// blocks. Mainly, this is used to partition Variables into
- /// single-block versus multi-block sets for leveraging sparsity in
- /// liveness analysis, and for implementing simple stack slot
- /// coalescing. As a special case, function arguments are always
- /// considered multi-block because they are live coming into the
- /// entry block.
+ /// Returns whether the given Variable is live across multiple blocks. Mainly,
+ /// this is used to partition Variables into single-block versus multi-block
+ /// sets for leveraging sparsity in liveness analysis, and for implementing
+ /// simple stack slot coalescing. As a special case, function arguments are
+ /// always considered multi-block because they are live coming into the entry
+ /// block.
bool isMultiBlock(const Variable *Var) const;
/// Returns the node that the given Variable is used in, assuming
- /// isMultiBlock() returns false. Otherwise, nullptr is returned.
+ /// isMultiBlock() returns false. Otherwise, nullptr is returned.
CfgNode *getLocalUseNode(const Variable *Var) const;
/// Returns the total use weight computed as the sum of uses multiplied by a
diff --git a/src/IcePhiLoweringImpl.h b/src/IcePhiLoweringImpl.h
index cf932d6..1957645 100644
--- a/src/IcePhiLoweringImpl.h
+++ b/src/IcePhiLoweringImpl.h
@@ -24,11 +24,11 @@
namespace Ice {
namespace PhiLowering {
-// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
-// preserve integrity of liveness analysis. This is needed for 32-bit
-// targets. This assumes the 32-bit target has loOperand, hiOperand,
-// and legalizeUndef methods. Undef values are also legalized, since
-// loOperand() and hiOperand() don't expect Undef input.
+/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
+/// integrity of liveness analysis. This is needed for 32-bit targets. This
+/// assumes the 32-bit target has loOperand, hiOperand, and legalizeUndef
+/// methods. Undef values are also legalized, since loOperand() and hiOperand()
+/// don't expect Undef input.
template <class TargetT>
void prelowerPhis32Bit(TargetT *Target, CfgNode *Node, Cfg *Func) {
for (Inst &I : Node->getPhis()) {
diff --git a/src/IceRNG.cpp b/src/IceRNG.cpp
index 89b1893..987d1a4 100644
--- a/src/IceRNG.cpp
+++ b/src/IceRNG.cpp
@@ -25,9 +25,9 @@
// TODO(wala,stichnot): Switch to RNG implementation from LLVM or C++11.
//
// TODO(wala,stichnot): Make it possible to replay the RNG sequence in a
-// subsequent run, for reproducing a bug. Print the seed in a comment
-// in the asm output. Embed the seed in the binary via metadata that an
-// attacker can't introspect.
+// subsequent run, for reproducing a bug. Print the seed in a comment in the
+// asm output. Embed the seed in the binary via metadata that an attacker can't
+// introspect.
RandomNumberGenerator::RandomNumberGenerator(uint64_t Seed, llvm::StringRef)
: State(Seed) {}
diff --git a/src/IceRNG.h b/src/IceRNG.h
index 4eeefa6..7ee2e39 100644
--- a/src/IceRNG.h
+++ b/src/IceRNG.h
@@ -52,9 +52,9 @@
uint64_t State;
};
-/// This class adds additional random number generator utilities. The
-/// reason for the wrapper class is that we want to keep the
-/// RandomNumberGenerator interface identical to LLVM's.
+/// This class adds additional random number generator utilities. The reason for
+/// the wrapper class is that we want to keep the RandomNumberGenerator
+/// interface identical to LLVM's.
class RandomNumberGeneratorWrapper {
RandomNumberGeneratorWrapper() = delete;
RandomNumberGeneratorWrapper(const RandomNumberGeneratorWrapper &) = delete;
@@ -71,9 +71,9 @@
RandomNumberGenerator &RNG;
};
-/// RandomShuffle is an implementation of std::random_shuffle() that
-/// doesn't change across stdlib implementations. Adapted from a
-/// sample implementation at cppreference.com.
+/// RandomShuffle is an implementation of std::random_shuffle() that doesn't
+/// change across stdlib implementations. Adapted from a sample implementation
+/// at cppreference.com.
template <class RandomIt, class RandomFunc>
void RandomShuffle(RandomIt First, RandomIt Last, RandomFunc &&RNG) {
for (auto i = Last - First - 1; i > 0; --i)
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index ad5c2b6..304ac37 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -82,14 +82,14 @@
: Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()),
Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)) {}
-// Prepare for full register allocation of all variables. We depend on
-// liveness analysis to have calculated live ranges.
+// Prepare for full register allocation of all variables. We depend on liveness
+// analysis to have calculated live ranges.
void LinearScan::initForGlobal() {
TimerMarker T(TimerStack::TT_initUnhandled, Func);
FindPreference = true;
// For full register allocation, normally we want to enable FindOverlap
// (meaning we look for opportunities for two overlapping live ranges to
- // safely share the same register). However, we disable it for phi-lowering
+ // safely share the same register). However, we disable it for phi-lowering
// register allocation since no overlap opportunities should be available and
// it's more expensive to look for opportunities.
FindOverlap = (Kind != RAK_Phi);
@@ -262,7 +262,7 @@
}
// This is called when Cur must be allocated a register but no registers are
-// available across Cur's live range. To handle this, we find a register that
+// available across Cur's live range. To handle this, we find a register that
// is not explicitly used during Cur's live range, spill that register to a
// stack location right before Cur's live range begins, and fill (reload) the
// register from the stack location right after Cur's live range ends.
@@ -297,9 +297,9 @@
if (I->getNumber() == End)
FillPoint = I;
if (SpillPoint != E) {
- // Remove from RegMask any physical registers referenced during Cur's live
- // range. Start looking after SpillPoint gets set, i.e. once Cur's live
- // range begins.
+ // Remove from RegMask any physical registers referenced during Cur's
+ // live range. Start looking after SpillPoint gets set, i.e. once Cur's
+ // live range begins.
FOREACH_VAR_IN_INST(Var, *I) {
if (!Var->hasRegTmp())
continue;
@@ -319,8 +319,9 @@
assert(RegNum != -1);
Iter.Cur->setRegNumTmp(RegNum);
Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType());
- // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc
- // is correctly identified as !isMultiBlock(), reducing stack frame size.
+ // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that
+ // SpillLoc is correctly identified as !isMultiBlock(), reducing stack frame
+ // size.
Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType());
// Add "reg=FakeDef;spill=reg" before SpillPoint
Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg));
@@ -413,8 +414,8 @@
if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) {
int32_t SrcReg = SrcVar->getRegNumTmp();
// Only consider source variables that have (so far) been assigned a
- // register. That register must be one in the RegMask set, e.g.
- // don't try to prefer the stack pointer as a result of the stacksave
+ // register. That register must be one in the RegMask set, e.g. don't
+ // try to prefer the stack pointer as a result of the stacksave
// intrinsic.
if (SrcVar->hasRegTmp() && Iter.RegMask[SrcReg]) {
if (FindOverlap && !Iter.Free[SrcReg]) {
@@ -469,7 +470,7 @@
// Remove registers from the Free[] list where an Unhandled pre-colored range
// overlaps with the current range, and set those registers to infinite weight
-// so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is
+// so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is
// an early exit check that turns a guaranteed O(N^2) algorithm into expected
// linear complexity.
void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) {
@@ -610,9 +611,9 @@
const SizeT Index = I - 1;
Variable *Item = Inactive[Index];
// Note: The Item->rangeOverlaps(Cur) clause is not part of the
- // description of AssignMemLoc() in the original paper. But there
- // doesn't seem to be any need to evict an inactive live range that
- // doesn't overlap with the live range currently being considered. It's
+ // description of AssignMemLoc() in the original paper. But there doesn't
+ // seem to be any need to evict an inactive live range that doesn't
+ // overlap with the live range currently being considered. It's
// especially bad if we would end up evicting an infinite-weight but
// currently-inactive live range. The most common situation for this
// would be a scratch register kill set for call instructions.
@@ -644,9 +645,9 @@
if (Randomized) {
// Create a random number generator for regalloc randomization. Merge
// function's sequence and Kind value as the Salt. Because regAlloc() is
- // called twice under O2, the second time with RAK_Phi, we check
- // Kind == RAK_Phi to determine the lowest-order bit to make sure the Salt
- // is different.
+ // called twice under O2, the second time with RAK_Phi, we check Kind ==
+ // RAK_Phi to determine the lowest-order bit to make sure the Salt is
+ // different.
uint64_t Salt =
(Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u);
Target->makeRandomRegisterPermutation(
diff --git a/src/IceRegAlloc.h b/src/IceRegAlloc.h
index b3986a3..ec37aa0 100644
--- a/src/IceRegAlloc.h
+++ b/src/IceRegAlloc.h
@@ -60,10 +60,10 @@
void initForGlobal();
void initForInfOnly();
- /// Move an item from the From set to the To set. From[Index] is pushed onto
+ /// Move an item from the From set to the To set. From[Index] is pushed onto
/// the end of To[], then the item is efficiently removed from From[] by
/// effectively swapping it with the last item in From[] and then popping it
- /// from the back. As such, the caller is best off iterating over From[] in
+ /// from the back. As such, the caller is best off iterating over From[] in
/// reverse order to avoid the need for special handling of the iterator.
void moveItem(UnorderedRanges &From, SizeT Index, UnorderedRanges &To) {
To.push_back(From[Index]);
@@ -109,8 +109,8 @@
/// currently assigned to. It can be greater than 1 as a result of
/// AllowOverlap inference.
llvm::SmallVector<int32_t, REGS_SIZE> RegUses;
- // TODO(jpp): for some architectures a SmallBitVector might not be big enough.
- // Evaluate what the performance impact on those architectures is.
+ // TODO(jpp): for some architectures a SmallBitVector might not be big
+ // enough. Evaluate what the performance impact on those architectures is.
llvm::SmallVector<const llvm::SmallBitVector *, REGS_SIZE> RegAliases;
bool FindPreference = false;
bool FindOverlap = false;
diff --git a/src/IceRegistersARM32.h b/src/IceRegistersARM32.h
index a80b9b2..eafed3a 100644
--- a/src/IceRegistersARM32.h
+++ b/src/IceRegistersARM32.h
@@ -23,8 +23,8 @@
class RegARM32 {
public:
- /// An enum of every register. The enum value may not match the encoding
- /// used to binary encode register operands in instructions.
+ /// An enum of every register. The enum value may not match the encoding used
+ /// to binary encode register operands in instructions.
enum AllRegisters {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP32, isFP64, isVec128, alias_init) \
@@ -37,8 +37,8 @@
#undef X
};
- /// An enum of GPR Registers. The enum value does match the encoding used
- /// to binary encode register operands in instructions.
+ /// An enum of GPR Registers. The enum value does match the encoding used to
+ /// binary encode register operands in instructions.
enum GPRRegister {
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isFP32, isFP64, isVec128, alias_init) \
diff --git a/src/IceRegistersX8632.h b/src/IceRegistersX8632.h
index b0d22bb..73492ef 100644
--- a/src/IceRegistersX8632.h
+++ b/src/IceRegistersX8632.h
@@ -23,8 +23,8 @@
class RegX8632 {
public:
- /// An enum of every register. The enum value may not match the encoding
- /// used to binary encode register operands in instructions.
+ /// An enum of every register. The enum value may not match the encoding used
+ /// to binary encode register operands in instructions.
enum AllRegisters {
#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
frameptr, isI8, isInt, isFP) \
@@ -37,8 +37,8 @@
#undef X
};
- /// An enum of GPR Registers. The enum value does match the encoding used
- /// to binary encode register operands in instructions.
+ /// An enum of GPR Registers. The enum value does match the encoding used to
+ /// binary encode register operands in instructions.
enum GPRRegister {
#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
frameptr, isI8, isInt, isFP) \
@@ -48,8 +48,8 @@
Encoded_Not_GPR = -1
};
- /// An enum of XMM Registers. The enum value does match the encoding used
- /// to binary encode register operands in instructions.
+ /// An enum of XMM Registers. The enum value does match the encoding used to
+ /// binary encode register operands in instructions.
enum XmmRegister {
#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
frameptr, isI8, isInt, isFP) \
@@ -59,8 +59,8 @@
Encoded_Not_Xmm = -1
};
- /// An enum of Byte Registers. The enum value does match the encoding used
- /// to binary encode register operands in instructions.
+ /// An enum of Byte Registers. The enum value does match the encoding used to
+ /// binary encode register operands in instructions.
enum ByteRegister {
#define X(val, encode) Encoded_##val encode,
REGX8632_BYTEREG_TABLE
diff --git a/src/IceRegistersX8664.h b/src/IceRegistersX8664.h
index bc448b2..3a10f00 100644
--- a/src/IceRegistersX8664.h
+++ b/src/IceRegistersX8664.h
@@ -23,8 +23,8 @@
class RegX8664 {
public:
- /// An enum of every register. The enum value may not match the encoding
- /// used to binary encode register operands in instructions.
+ /// An enum of every register. The enum value may not match the encoding used
+ /// to binary encode register operands in instructions.
enum AllRegisters {
#define X(val, encode, name64, name, name16, name8, scratch, preserved, \
stackptr, frameptr, isInt, isFP) \
@@ -37,8 +37,8 @@
#undef X
};
- /// An enum of GPR Registers. The enum value does match the encoding used
- /// to binary encode register operands in instructions.
+ /// An enum of GPR Registers. The enum value does match the encoding used to
+ /// binary encode register operands in instructions.
enum GPRRegister {
#define X(val, encode, name64, name, name16, name8, scratch, preserved, \
stackptr, frameptr, isInt, isFP) \
@@ -48,8 +48,8 @@
Encoded_Not_GPR = -1
};
- /// An enum of XMM Registers. The enum value does match the encoding used
- /// to binary encode register operands in instructions.
+ /// An enum of XMM Registers. The enum value does match the encoding used to
+ /// binary encode register operands in instructions.
enum XmmRegister {
#define X(val, encode, name64, name, name16, name8, scratch, preserved, \
stackptr, frameptr, isInt, isFP) \
@@ -59,8 +59,8 @@
Encoded_Not_Xmm = -1
};
- /// An enum of Byte Registers. The enum value does match the encoding used
- /// to binary encode register operands in instructions.
+ /// An enum of Byte Registers. The enum value does match the encoding used to
+ /// binary encode register operands in instructions.
enum ByteRegister {
#define X(val, encode) Encoded_##val encode,
REGX8664_BYTEREG_TABLE
diff --git a/src/IceSwitchLowering.cpp b/src/IceSwitchLowering.cpp
index 6207495..047aa91 100644
--- a/src/IceSwitchLowering.cpp
+++ b/src/IceSwitchLowering.cpp
@@ -55,8 +55,8 @@
// Test for a single jump table. This can be done in constant time whereas
// finding the best set of jump table would be quadratic, too slow(?). If
- // jump tables were included in the search tree we'd first have to traverse to
- // them. Ideally we would have an unbalanced tree which is biased towards
+ // jump tables were included in the search tree we'd first have to traverse
+ // to them. Ideally we would have an unbalanced tree which is biased towards
// frequently executed code but we can't do this well without profiling data.
// So, this single jump table is a good starting point where you can get to
// the jump table quickly without figuring out how to unbalance the tree.
diff --git a/src/IceSwitchLowering.h b/src/IceSwitchLowering.h
index e1cdb8a..df3bef3 100644
--- a/src/IceSwitchLowering.h
+++ b/src/IceSwitchLowering.h
@@ -75,8 +75,8 @@
bool tryAppend(const CaseCluster &New);
};
-/// Store the jump table data so that it can be emitted later in the correct
-/// ELF section once the offsets from the start of the function are known.
+/// Store the jump table data so that it can be emitted later in the correct ELF
+/// section once the offsets from the start of the function are known.
class JumpTableData {
JumpTableData() = delete;
JumpTableData &operator=(const JumpTableData &) = delete;
diff --git a/src/IceTLS.h b/src/IceTLS.h
index 0e7731d..9a20e70 100644
--- a/src/IceTLS.h
+++ b/src/IceTLS.h
@@ -9,9 +9,8 @@
///
/// \file
/// This file defines macros for working around the lack of support for
-/// thread_local in MacOS 10.6. It assumes std::thread is written in
-/// terms of pthread. Define ICE_THREAD_LOCAL_HACK to enable the
-/// pthread workarounds.
+/// thread_local in MacOS 10.6. It assumes std::thread is written in terms of
+/// pthread. Define ICE_THREAD_LOCAL_HACK to enable the pthread workarounds.
///
//===----------------------------------------------------------------------===//
@@ -26,25 +25,25 @@
// Defines 4 macros for unifying thread_local and pthread:
//
-// ICE_TLS_DECLARE_FIELD(Type, FieldName): Declare a static
-// thread_local field inside the current class definition. "Type"
-// needs to be a pointer type, such as int* or class Foo*.
+// ICE_TLS_DECLARE_FIELD(Type, FieldName): Declare a static thread_local field
+// inside the current class definition. "Type" needs to be a pointer type, such
+// as int* or class Foo*.
//
// ICE_TLS_DEFINE_FIELD(Type, ClassName, FieldName): Define a static
-// thread_local field outside of its class definition. The field will
+// thread_local field outside of its class definition. The field will
// ultimately be initialized to nullptr.
//
-// ICE_TLS_INIT_FIELD(FieldName): Ensure the thread_local field is
-// properly initialized. This is intended to be called from within a
-// static method of the field's class after main() starts (to ensure
-// that the pthread library is fully initialized) but before any uses
-// of ICE_TLS_GET_FIELD or ICE_TLS_SET_FIELD.
+// ICE_TLS_INIT_FIELD(FieldName): Ensure the thread_local field is properly
+// initialized. This is intended to be called from within a static method of
+// the field's class after main() starts (to ensure that the pthread library is
+// fully initialized) but before any uses of ICE_TLS_GET_FIELD or
+// ICE_TLS_SET_FIELD.
//
// ICE_TLS_GET_FIELD(Type, FieldName): Read the value of the static
-// thread_local field. Must be done within the context of its class.
+// thread_local field. Must be done within the context of its class.
//
// ICE_TLS_SET_FIELD(FieldName, Value): Write a value into the static
-// thread_local field. Must be done within the context of its class.
+// thread_local field. Must be done within the context of its class.
// TODO(stichnot): Limit this define to only the platforms that
// absolutely require it. And ideally, eventually remove this hack
@@ -52,17 +51,16 @@
#define ICE_THREAD_LOCAL_HACK
#ifdef ICE_THREAD_LOCAL_HACK
-// For a static thread_local field F of a class C, instead of
-// declaring and defining C::F, we create two static fields:
+// For a static thread_local field F of a class C, instead of declaring and
+// defining C::F, we create two static fields:
// static pthread_key_t F__key;
// static int F__initStatus;
//
// The F__initStatus field is used to hold the result of the
-// pthread_key_create() call, where a zero value indicates success,
-// and a nonzero value indicates failure or that ICE_TLS_INIT_FIELD()
-// was never called.
-// The F__key field is used as the argument to
-// pthread_getspecific() and pthread_setspecific().
+// pthread_key_create() call, where a zero value indicates success, and a
+// nonzero value indicates failure or that ICE_TLS_INIT_FIELD() was never
+// called. The F__key field is used as the argument to pthread_getspecific()
+// and pthread_setspecific().
#include <pthread.h>
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 5268aa7..6920788 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -8,11 +8,10 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements the skeleton of the TargetLowering class,
-/// specifically invoking the appropriate lowering method for a given
-/// instruction kind and driving global register allocation. It also
-/// implements the non-deleted instruction iteration in
-/// LoweringContext.
+/// This file implements the skeleton of the TargetLowering class, specifically
+/// invoking the appropriate lowering method for a given instruction kind and
+/// driving global register allocation. It also implements the non-deleted
+/// instruction iteration in LoweringContext.
///
//===----------------------------------------------------------------------===//
@@ -117,29 +116,27 @@
}
}
-// Lowers a single instruction according to the information in
-// Context, by checking the Context.Cur instruction kind and calling
-// the appropriate lowering method. The lowering method should insert
-// target instructions at the Cur.Next insertion point, and should not
-// delete the Context.Cur instruction or advance Context.Cur.
+// Lowers a single instruction according to the information in Context, by
+// checking the Context.Cur instruction kind and calling the appropriate
+// lowering method. The lowering method should insert target instructions at
+// the Cur.Next insertion point, and should not delete the Context.Cur
+// instruction or advance Context.Cur.
//
-// The lowering method may look ahead in the instruction stream as
-// desired, and lower additional instructions in conjunction with the
-// current one, for example fusing a compare and branch. If it does,
-// it should advance Context.Cur to point to the next non-deleted
-// instruction to process, and it should delete any additional
-// instructions it consumes.
+// The lowering method may look ahead in the instruction stream as desired, and
+// lower additional instructions in conjunction with the current one, for
+// example fusing a compare and branch. If it does, it should advance
+// Context.Cur to point to the next non-deleted instruction to process, and it
+// should delete any additional instructions it consumes.
void TargetLowering::lower() {
assert(!Context.atEnd());
Inst *Inst = Context.getCur();
Inst->deleteIfDead();
if (!Inst->isDeleted() && !llvm::isa<InstFakeDef>(Inst) &&
!llvm::isa<InstFakeUse>(Inst)) {
- // Mark the current instruction as deleted before lowering,
- // otherwise the Dest variable will likely get marked as non-SSA.
- // See Variable::setDefinition(). However, just pass-through
- // FakeDef and FakeUse instructions that might have been inserted
- // prior to lowering.
+ // Mark the current instruction as deleted before lowering, otherwise the
+ // Dest variable will likely get marked as non-SSA. See
+ // Variable::setDefinition(). However, just pass-through FakeDef and
+ // FakeUse instructions that might have been inserted prior to lowering.
Inst->setDeleted();
switch (Inst->getKind()) {
case Inst::Alloca:
@@ -231,10 +228,10 @@
Func->setError("Can't lower unsupported instruction type");
}
-// Drives register allocation, allowing all physical registers (except
-// perhaps for the frame pointer) to be allocated. This set of
-// registers could potentially be parameterized if we want to restrict
-// registers e.g. for performance testing.
+// Drives register allocation, allowing all physical registers (except perhaps
+// for the frame pointer) to be allocated. This set of registers could
+// potentially be parameterized if we want to restrict registers e.g. for
+// performance testing.
void TargetLowering::regAlloc(RegAllocKind Kind) {
TimerMarker T(TimerStack::TT_regAlloc, Func);
LinearScan LinearScan(Func);
@@ -250,15 +247,14 @@
}
void TargetLowering::inferTwoAddress() {
- // Find two-address non-SSA instructions where Dest==Src0, and set
- // the DestNonKillable flag to keep liveness analysis consistent.
+ // Find two-address non-SSA instructions where Dest==Src0, and set the
+ // DestNonKillable flag to keep liveness analysis consistent.
for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) {
if (Inst->isDeleted())
continue;
if (Variable *Dest = Inst->getDest()) {
- // TODO(stichnot): We may need to consider all source
- // operands, not just the first one, if using 3-address
- // instructions.
+ // TODO(stichnot): We may need to consider all source operands, not just
+ // the first one, if using 3-address instructions.
if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest)
Inst->setDestNonKillable();
}
@@ -268,8 +264,8 @@
void TargetLowering::sortVarsByAlignment(VarList &Dest,
const VarList &Source) const {
Dest = Source;
- // Instead of std::sort, we could do a bucket sort with log2(alignment)
- // as the buckets, if performance is an issue.
+ // Instead of std::sort, we could do a bucket sort with log2(alignment) as
+ // the buckets, if performance is an issue.
std::sort(Dest.begin(), Dest.end(),
[this](const Variable *V1, const Variable *V2) {
return typeWidthInBytesOnStack(V1->getType()) >
@@ -296,17 +292,17 @@
}
}
- // If SimpleCoalescing is false, each variable without a register
- // gets its own unique stack slot, which leads to large stack
- // frames. If SimpleCoalescing is true, then each "global" variable
- // without a register gets its own slot, but "local" variable slots
- // are reused across basic blocks. E.g., if A and B are local to
- // block 1 and C is local to block 2, then C may share a slot with A or B.
+ // If SimpleCoalescing is false, each variable without a register gets its
+ // own unique stack slot, which leads to large stack frames. If
+ // SimpleCoalescing is true, then each "global" variable without a register
+ // gets its own slot, but "local" variable slots are reused across basic
+ // blocks. E.g., if A and B are local to block 1 and C is local to block 2,
+ // then C may share a slot with A or B.
//
// We cannot coalesce stack slots if this function calls a "returns twice"
- // function. In that case, basic blocks may be revisited, and variables
- // local to those basic blocks are actually live until after the
- // called function returns a second time.
+ // function. In that case, basic blocks may be revisited, and variables local
+ // to those basic blocks are actually live until after the called function
+ // returns a second time.
const bool SimpleCoalescing = !callsReturnsTwice();
std::vector<size_t> LocalsSize(Func->getNumNodes());
@@ -317,15 +313,15 @@
RegsUsed[Var->getRegNum()] = true;
continue;
}
- // An argument either does not need a stack slot (if passed in a
- // register) or already has one (if passed on the stack).
+ // An argument either does not need a stack slot (if passed in a register)
+ // or already has one (if passed on the stack).
if (Var->getIsArg())
continue;
// An unreferenced variable doesn't need a stack slot.
if (!IsVarReferenced[Var->getIndex()])
continue;
- // Check a target-specific variable (it may end up sharing stack slots)
- // and not need accounting here.
+ // Check a target-specific variable (it may end up sharing stack slots) and
+ // not need accounting here.
if (TargetVarHook(Var))
continue;
SpilledVariables.push_back(Var);
@@ -336,8 +332,8 @@
for (Variable *Var : SortedSpilledVariables) {
size_t Increment = typeWidthInBytesOnStack(Var->getType());
- // We have sorted by alignment, so the first variable we encounter that
- // is located in each area determines the max alignment for the area.
+ // We have sorted by alignment, so the first variable we encounter that is
+ // located in each area determines the max alignment for the area.
if (!*SpillAreaAlignmentBytes)
*SpillAreaAlignmentBytes = Increment;
if (SimpleCoalescing && VMetadata->isTracked(Var)) {
@@ -373,8 +369,8 @@
*SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
}
- // If there are separate globals and locals areas, make sure the
- // locals area is aligned by padding the end of the globals area.
+ // If there are separate globals and locals areas, make sure the locals area
+ // is aligned by padding the end of the globals area.
if (LocalsSlotsAlignmentBytes) {
uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
GlobalsAndSubsequentPaddingSize =
@@ -391,11 +387,11 @@
const VariablesMetadata *VMetadata = Func->getVMetadata();
// For testing legalization of large stack offsets on targets with limited
// offset bits in instruction encodings, add some padding. This assumes that
- // SpillAreaSizeBytes has accounted for the extra test padding.
- // When UseFramePointer is true, the offset depends on the padding,
- // not just the SpillAreaSizeBytes. On the other hand, when UseFramePointer
- // is false, the offsets depend on the gap between SpillAreaSizeBytes
- // and SpillAreaPaddingBytes, so we don't increment that.
+ // SpillAreaSizeBytes has accounted for the extra test padding. When
+ // UseFramePointer is true, the offset depends on the padding, not just the
+ // SpillAreaSizeBytes. On the other hand, when UseFramePointer is false, the
+ // offsets depend on the gap between SpillAreaSizeBytes and
+ // SpillAreaPaddingBytes, so we don't increment that.
size_t TestPadding = Ctx->getFlags().getTestStackExtra();
if (UsesFramePointer)
SpillAreaPaddingBytes += TestPadding;
@@ -506,8 +502,8 @@
if (!BuildDefs::dump())
return;
- // If external and not initialized, this must be a cross test.
- // Don't generate a declaration for such cases.
+ // If external and not initialized, this must be a cross test. Don't generate
+ // a declaration for such cases.
const bool IsExternal =
Var.isExternal() || Ctx->getFlags().getDisableInternal();
if (IsExternal && !Var.hasInitializer())
@@ -577,10 +573,10 @@
}
}
} else {
- // NOTE: for non-constant zero initializers, this is BSS (no bits),
- // so an ELF writer would not write to the file, and only track
- // virtual offsets, but the .s writer still needs this .zero and
- // cannot simply use the .size to advance offsets.
+ // NOTE: for non-constant zero initializers, this is BSS (no bits), so an
+ // ELF writer would not write to the file, and only track virtual offsets,
+ // but the .s writer still needs this .zero and cannot simply use the .size
+ // to advance offsets.
Str << "\t.zero\t" << Size << "\n";
}
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 7184ff0..71b6ca2 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -9,12 +9,11 @@
///
/// \file
/// This file declares the TargetLowering, LoweringContext, and
-/// TargetDataLowering classes. TargetLowering is an abstract class
-/// used to drive the translation/lowering process. LoweringContext
-/// maintains a context for lowering each instruction, offering
-/// conveniences such as iterating over non-deleted instructions.
-/// TargetDataLowering is an abstract class used to drive the
-/// lowering/emission of global initializers, external global
+/// TargetDataLowering classes. TargetLowering is an abstract class used to
+/// drive the translation/lowering process. LoweringContext maintains a context
+/// for lowering each instruction, offering conveniences such as iterating over
+/// non-deleted instructions. TargetDataLowering is an abstract class used to
+/// drive the lowering/emission of global initializers, external global
/// declarations, and internal constant pools.
///
//===----------------------------------------------------------------------===//
@@ -29,12 +28,11 @@
namespace Ice {
-/// LoweringContext makes it easy to iterate through non-deleted
-/// instructions in a node, and insert new (lowered) instructions at
-/// the current point. Along with the instruction list container and
-/// associated iterators, it holds the current node, which is needed
-/// when inserting new instructions in order to track whether variables
-/// are used as single-block or multi-block.
+/// LoweringContext makes it easy to iterate through non-deleted instructions in
+/// a node, and insert new (lowered) instructions at the current point. Along
+/// with the instruction list container and associated iterators, it holds the
+/// current node, which is needed when inserting new instructions in order to
+/// track whether variables are used as single-block or multi-block.
class LoweringContext {
LoweringContext(const LoweringContext &) = delete;
LoweringContext &operator=(const LoweringContext &) = delete;
@@ -72,17 +70,16 @@
/// Node is the argument to Inst::updateVars().
CfgNode *Node = nullptr;
Inst *LastInserted = nullptr;
- /// Cur points to the current instruction being considered. It is
- /// guaranteed to point to a non-deleted instruction, or to be End.
+ /// Cur points to the current instruction being considered. It is guaranteed
+ /// to point to a non-deleted instruction, or to be End.
InstList::iterator Cur;
- /// Next doubles as a pointer to the next valid instruction (if any),
- /// and the new-instruction insertion point. It is also updated for
- /// the caller in case the lowering consumes more than one high-level
- /// instruction. It is guaranteed to point to a non-deleted
- /// instruction after Cur, or to be End. TODO: Consider separating
- /// the notion of "next valid instruction" and "new instruction
- /// insertion point", to avoid confusion when previously-deleted
- /// instructions come between the two points.
+ /// Next doubles as a pointer to the next valid instruction (if any), and the
+ /// new-instruction insertion point. It is also updated for the caller in case
+ /// the lowering consumes more than one high-level instruction. It is
+ /// guaranteed to point to a non-deleted instruction after Cur, or to be End.
+ // TODO: Consider separating the notion of "next valid instruction" and "new
+ // instruction insertion point", to avoid confusion when previously-deleted
+ // instructions come between the two points.
InstList::iterator Next;
/// Begin is a copy of Insts.begin(), used if iterators are moved backward.
InstList::iterator Begin;
@@ -159,24 +156,22 @@
/// Inserts and lowers a single high-level instruction at a specific insertion
/// point.
void lowerInst(CfgNode *Node, InstList::iterator Next, InstHighLevel *Instr);
- /// Does preliminary lowering of the set of Phi instructions in the
- /// current node. The main intention is to do what's needed to keep
- /// the unlowered Phi instructions consistent with the lowered
- /// non-Phi instructions, e.g. to lower 64-bit operands on a 32-bit
- /// target.
+ /// Does preliminary lowering of the set of Phi instructions in the current
+ /// node. The main intention is to do what's needed to keep the unlowered Phi
+ /// instructions consistent with the lowered non-Phi instructions, e.g. to
+ /// lower 64-bit operands on a 32-bit target.
virtual void prelowerPhis() {}
- /// Tries to do branch optimization on a single instruction. Returns
- /// true if some optimization was done.
+ /// Tries to do branch optimization on a single instruction. Returns true if
+ /// some optimization was done.
virtual bool doBranchOpt(Inst * /*I*/, const CfgNode * /*NextNode*/) {
return false;
}
virtual SizeT getNumRegisters() const = 0;
- /// Returns a variable pre-colored to the specified physical
- /// register. This is generally used to get very direct access to
- /// the register such as in the prolog or epilog or for marking
- /// scratch registers as killed by a call. If a Type is not
- /// provided, a target-specific default type is used.
+ /// Returns a variable pre-colored to the specified physical register. This is
+ /// generally used to get very direct access to the register such as in the
+ /// prolog or epilog or for marking scratch registers as killed by a call. If
+ /// a Type is not provided, a target-specific default type is used.
virtual Variable *getPhysicalRegister(SizeT RegNum,
Type Ty = IceType_void) = 0;
/// Returns a printable name for the register.
@@ -187,8 +182,8 @@
virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
bool hasComputedFrame() const { return HasComputedFrame; }
- /// Returns true if this function calls a function that has the
- /// "returns twice" attribute.
+ /// Returns true if this function calls a function that has the "returns
+ /// twice" attribute.
bool callsReturnsTwice() const { return CallsReturnsTwice; }
void setCallsReturnsTwice(bool RetTwice) { CallsReturnsTwice = RetTwice; }
int32_t getStackAdjustment() const { return StackAdjustment; }
@@ -220,10 +215,10 @@
const llvm::SmallBitVector &ExcludeRegisters,
uint64_t Salt) const = 0;
- /// Save/restore any mutable state for the situation where code
- /// emission needs multiple passes, such as sandboxing or relaxation.
- /// Subclasses may provide their own implementation, but should be
- /// sure to also call the parent class's methods.
+ /// Save/restore any mutable state for the situation where code emission needs
+ /// multiple passes, such as sandboxing or relaxation. Subclasses may provide
+ /// their own implementation, but should be sure to also call the parent
+ /// class's methods.
virtual void snapshotEmitState() {
SnapshotStackAdjustment = StackAdjustment;
}
@@ -285,30 +280,30 @@
virtual void doMockBoundsCheck(Operand *) {}
virtual void randomlyInsertNop(float Probability,
RandomNumberGenerator &RNG) = 0;
- /// This gives the target an opportunity to post-process the lowered
- /// expansion before returning.
+ /// This gives the target an opportunity to post-process the lowered expansion
+ /// before returning.
virtual void postLower() {}
- /// Find two-address non-SSA instructions and set the DestNonKillable flag
- /// to keep liveness analysis consistent.
+ /// Find two-address non-SSA instructions and set the DestNonKillable flag to
+ /// keep liveness analysis consistent.
void inferTwoAddress();
- /// Make a pass over the Cfg to determine which variables need stack slots
- /// and place them in a sorted list (SortedSpilledVariables). Among those,
- /// vars, classify the spill variables as local to the basic block vs
- /// global (multi-block) in order to compute the parameters GlobalsSize
- /// and SpillAreaSizeBytes (represents locals or general vars if the
- /// coalescing of locals is disallowed) along with alignments required
- /// for variables in each area. We rely on accurate VMetadata in order to
- /// classify a variable as global vs local (otherwise the variable is
- /// conservatively global). The in-args should be initialized to 0.
+ /// Make a pass over the Cfg to determine which variables need stack slots and
+ /// place them in a sorted list (SortedSpilledVariables). Among those, vars,
+ /// classify the spill variables as local to the basic block vs global
+ /// (multi-block) in order to compute the parameters GlobalsSize and
+ /// SpillAreaSizeBytes (represents locals or general vars if the coalescing of
+ /// locals is disallowed) along with alignments required for variables in each
+ /// area. We rely on accurate VMetadata in order to classify a variable as
+ /// global vs local (otherwise the variable is conservatively global). The
+ /// in-args should be initialized to 0.
///
- /// This is only a pre-pass and the actual stack slot assignment is
- /// handled separately.
+ /// This is only a pre-pass and the actual stack slot assignment is handled
+ /// separately.
///
- /// There may be target-specific Variable types, which will be handled
- /// by TargetVarHook. If the TargetVarHook returns true, then the variable
- /// is skipped and not considered with the rest of the spilled variables.
+ /// There may be target-specific Variable types, which will be handled by
+ /// TargetVarHook. If the TargetVarHook returns true, then the variable is
+ /// skipped and not considered with the rest of the spilled variables.
void getVarStackSlotParams(VarList &SortedSpilledVariables,
llvm::SmallBitVector &RegsUsed,
size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
@@ -316,9 +311,9 @@
uint32_t *LocalsSlotsAlignmentBytes,
std::function<bool(Variable *)> TargetVarHook);
- /// Calculate the amount of padding needed to align the local and global
- /// areas to the required alignment. This assumes the globals/locals layout
- /// used by getVarStackSlotParams and assignVarStackSlots.
+ /// Calculate the amount of padding needed to align the local and global areas
+ /// to the required alignment. This assumes the globals/locals layout used by
+ /// getVarStackSlotParams and assignVarStackSlots.
void alignStackSpillAreas(uint32_t SpillAreaStartOffset,
uint32_t SpillAreaAlignmentBytes,
size_t GlobalsSize,
@@ -326,21 +321,19 @@
uint32_t *SpillAreaPaddingBytes,
uint32_t *LocalsSlotsPaddingBytes);
- /// Make a pass through the SortedSpilledVariables and actually assign
- /// stack slots. SpillAreaPaddingBytes takes into account stack alignment
- /// padding. The SpillArea starts after that amount of padding.
- /// This matches the scheme in getVarStackSlotParams, where there may
- /// be a separate multi-block global var spill area and a local var
- /// spill area.
+ /// Make a pass through the SortedSpilledVariables and actually assign stack
+ /// slots. SpillAreaPaddingBytes takes into account stack alignment padding.
+ /// The SpillArea starts after that amount of padding. This matches the scheme
+ /// in getVarStackSlotParams, where there may be a separate multi-block global
+ /// var spill area and a local var spill area.
void assignVarStackSlots(VarList &SortedSpilledVariables,
size_t SpillAreaPaddingBytes,
size_t SpillAreaSizeBytes,
size_t GlobalsAndSubsequentPaddingSize,
bool UsesFramePointer);
- /// Sort the variables in Source based on required alignment.
- /// The variables with the largest alignment need are placed in the front
- /// of the Dest list.
+ /// Sort the variables in Source based on required alignment. The variables
+ /// with the largest alignment need are placed in the front of the Dest list.
void sortVarsByAlignment(VarList &Dest, const VarList &Source) const;
/// Make a call to an external helper function.
@@ -362,8 +355,8 @@
GlobalContext *Ctx;
bool HasComputedFrame = false;
bool CallsReturnsTwice = false;
- /// StackAdjustment keeps track of the current stack offset from its
- /// natural location, as arguments are pushed for a function call.
+ /// StackAdjustment keeps track of the current stack offset from its natural
+ /// location, as arguments are pushed for a function call.
int32_t StackAdjustment = 0;
SizeT NextLabelNumber = 0;
SizeT NextJumpTableNumber = 0;
@@ -411,9 +404,9 @@
int32_t SnapshotStackAdjustment = 0;
};
-/// TargetDataLowering is used for "lowering" data including initializers
-/// for global variables, and the internal constant pools. It is separated
-/// out from TargetLowering because it does not require a Cfg.
+/// TargetDataLowering is used for "lowering" data including initializers for
+/// global variables, and the internal constant pools. It is separated out from
+/// TargetLowering because it does not require a Cfg.
class TargetDataLowering {
TargetDataLowering() = delete;
TargetDataLowering(const TargetDataLowering &) = delete;
@@ -432,8 +425,8 @@
void emitGlobal(const VariableDeclaration &Var,
const IceString &SectionSuffix);
- /// For now, we assume .long is the right directive for emitting 4 byte
- /// emit global relocations. However, LLVM MIPS usually uses .4byte instead.
+ /// For now, we assume .long is the right directive for emitting 4 byte emit
+ /// global relocations. However, LLVM MIPS usually uses .4byte instead.
/// Perhaps there is some difference when the location is unaligned.
static const char *getEmit32Directive() { return ".long"; }
@@ -441,9 +434,9 @@
GlobalContext *Ctx;
};
-/// TargetHeaderLowering is used to "lower" the header of an output file.
-/// It writes out the target-specific header attributes. E.g., for ARM
-/// this writes out the build attributes (float ABI, etc.).
+/// TargetHeaderLowering is used to "lower" the header of an output file. It
+/// writes out the target-specific header attributes. E.g., for ARM this writes
+/// out the build attributes (float ABI, etc.).
class TargetHeaderLowering {
TargetHeaderLowering() = delete;
TargetHeaderLowering(const TargetHeaderLowering &) = delete;
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 0634e45..fef145f 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -47,7 +47,7 @@
} while (0)
// The following table summarizes the logic for lowering the icmp instruction
-// for i32 and narrower types. Each icmp condition has a clear mapping to an
+// for i32 and narrower types. Each icmp condition has a clear mapping to an
// ARM32 conditional move instruction.
const struct TableIcmp32_ {
@@ -62,8 +62,8 @@
// The following table summarizes the logic for lowering the icmp instruction
// for the i64 type. Two conditional moves are needed for setting to 1 or 0.
-// The operands may need to be swapped, and there is a slight difference
-// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
+// The operands may need to be swapped, and there is a slight difference for
+// signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
const struct TableIcmp64_ {
bool IsSigned;
bool Swapped;
@@ -82,18 +82,16 @@
return TableIcmp32[Index].Mapping;
}
-// In some cases, there are x-macros tables for both high-level and
-// low-level instructions/operands that use the same enum key value.
-// The tables are kept separate to maintain a proper separation
-// between abstraction layers. There is a risk that the tables could
-// get out of sync if enum values are reordered or if entries are
-// added or deleted. The following dummy namespaces use
+// In some cases, there are x-macros tables for both high-level and low-level
+// instructions/operands that use the same enum key value. The tables are kept
+// separate to maintain a proper separation between abstraction layers. There
+// is a risk that the tables could get out of sync if enum values are reordered
+// or if entries are added or deleted. The following dummy namespaces use
// static_asserts to ensure everything is kept in sync.
// Validate the enum values in ICMPARM32_TABLE.
namespace dummy1 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
ICMPARM32_TABLE
@@ -104,8 +102,8 @@
#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
ICEINSTICMP_TABLE
#undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
static const int _table2_##val = _tmp_##val; \
static_assert( \
@@ -113,8 +111,8 @@
"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
ICMPARM32_TABLE
#undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
#define X(tag, str) \
static_assert( \
_table1_##tag == _table2_##tag, \
@@ -126,17 +124,17 @@
// Stack alignment
const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
-// Value is in bytes. Return Value adjusted to the next highest multiple
-// of the stack alignment.
+// Value is in bytes. Return Value adjusted to the next highest multiple of the
+// stack alignment.
uint32_t applyStackAlignment(uint32_t Value) {
return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
}
-// Value is in bytes. Return Value adjusted to the next highest multiple
-// of the stack alignment required for the given type.
+// Value is in bytes. Return Value adjusted to the next highest multiple of the
+// stack alignment required for the given type.
uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
- // Use natural alignment, except that normally (non-NaCl) ARM only
- // aligns vectors to 8 bytes.
+ // Use natural alignment, except that normally (non-NaCl) ARM only aligns
+ // vectors to 8 bytes.
// TODO(jvoung): Check this ...
size_t typeAlignInBytes = typeWidthInBytes(Ty);
if (isVectorType(Ty))
@@ -172,9 +170,8 @@
TargetARM32::TargetARM32(Cfg *Func)
: TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
- // TODO: Don't initialize IntegerRegisters and friends every time.
- // Instead, initialize in some sort of static initializer for the
- // class.
+ // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
+ // initialize in some sort of static initializer for the class.
// Limit this size (or do all bitsets need to be the same width)???
llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);
@@ -243,19 +240,18 @@
// Argument lowering
Func->doArgLowering();
- // Target lowering. This requires liveness analysis for some parts
- // of the lowering decisions, such as compare/branch fusing. If
- // non-lightweight liveness analysis is used, the instructions need
- // to be renumbered first. TODO: This renumbering should only be
- // necessary if we're actually calculating live intervals, which we
- // only do for register allocation.
+ // Target lowering. This requires liveness analysis for some parts of the
+ // lowering decisions, such as compare/branch fusing. If non-lightweight
+ // liveness analysis is used, the instructions need to be renumbered first.
+ // TODO: This renumbering should only be necessary if we're actually
+ // calculating live intervals, which we only do for register allocation.
Func->renumberInstructions();
if (Func->hasError())
return;
- // TODO: It should be sufficient to use the fastest liveness
- // calculation, i.e. livenessLightweight(). However, for some
- // reason that slows down the rest of the translation. Investigate.
+ // TODO: It should be sufficient to use the fastest liveness calculation,
+ // i.e. livenessLightweight(). However, for some reason that slows down the
+ // rest of the translation. Investigate.
Func->liveness(Liveness_Basic);
if (Func->hasError())
return;
@@ -266,19 +262,19 @@
return;
Func->dump("After ARM32 codegen");
- // Register allocation. This requires instruction renumbering and
- // full liveness analysis.
+ // Register allocation. This requires instruction renumbering and full
+ // liveness analysis.
Func->renumberInstructions();
if (Func->hasError())
return;
Func->liveness(Liveness_Intervals);
if (Func->hasError())
return;
- // Validate the live range computations. The expensive validation
- // call is deliberately only made when assertions are enabled.
+ // Validate the live range computations. The expensive validation call is
+ // deliberately only made when assertions are enabled.
assert(Func->validateLiveness());
- // The post-codegen dump is done here, after liveness analysis and
- // associated cleanup, to make the dump cleaner and more useful.
+ // The post-codegen dump is done here, after liveness analysis and associated
+ // cleanup, to make the dump cleaner and more useful.
Func->dump("After initial ARM32 codegen");
Func->getVMetadata()->init(VMK_All);
regAlloc(RAK_Global);
@@ -305,11 +301,10 @@
Func->contractEmptyNodes();
Func->reorderNodes();
- // Branch optimization. This needs to be done just before code
- // emission. In particular, no transformations that insert or
- // reorder CfgNodes should be done after branch optimization. We go
- // ahead and do it before nop insertion to reduce the amount of work
- // needed for searching for opportunities.
+ // Branch optimization. This needs to be done just before code emission. In
+ // particular, no transformations that insert or reorder CfgNodes should be
+ // done after branch optimization. We go ahead and do it before nop insertion
+ // to reduce the amount of work needed for searching for opportunities.
Func->doBranchOpt();
Func->dump("After branch optimization");
@@ -395,8 +390,8 @@
Reg = Func->makeVariable(Ty);
Reg->setRegNum(RegNum);
PhysicalRegisters[Ty][RegNum] = Reg;
- // Specially mark SP and LR as an "argument" so that it is considered
- // live upon function entry.
+ // Specially mark SP and LR as an "argument" so that it is considered live
+ // upon function entry.
if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) {
Func->addImplicitArg(Reg);
Reg->setIgnoreLiveness();
@@ -445,15 +440,15 @@
if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
return false;
int32_t RegLo, RegHi;
- // Always start i64 registers at an even register, so this may end
- // up padding away a register.
+ // Always start i64 registers at an even register, so this may end up padding
+ // away a register.
NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2);
RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
++NumGPRRegsUsed;
RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
++NumGPRRegsUsed;
- // If this bumps us past the boundary, don't allocate to a register
- // and leave any previously speculatively consumed registers as consumed.
+ // If this bumps us past the boundary, don't allocate to a register and leave
+ // any previously speculatively consumed registers as consumed.
if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
return false;
Regs->first = RegLo;
@@ -474,15 +469,15 @@
return false;
if (isVectorType(Ty)) {
NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4);
- // Q registers are declared in reverse order, so
- // RegARM32::Reg_q0 > RegARM32::Reg_q1. Therefore, we need to subtract
- // NumFPRegUnits from Reg_q0. Same thing goes for D registers.
+ // Q registers are declared in reverse order, so RegARM32::Reg_q0 >
+ // RegARM32::Reg_q1. Therefore, we need to subtract NumFPRegUnits from
+ // Reg_q0. Same thing goes for D registers.
static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
"ARM32 Q registers are possibly declared incorrectly.");
*Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4);
NumFPRegUnits += 4;
- // If this bumps us past the boundary, don't allocate to a register
- // and leave any previously speculatively consumed registers as consumed.
+ // If this bumps us past the boundary, don't allocate to a register and
+ // leave any previously speculatively consumed registers as consumed.
if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
return false;
} else if (Ty == IceType_f64) {
@@ -491,8 +486,8 @@
NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2);
*Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2);
NumFPRegUnits += 2;
- // If this bumps us past the boundary, don't allocate to a register
- // and leave any previously speculatively consumed registers as consumed.
+ // If this bumps us past the boundary, don't allocate to a register and
+ // leave any previously speculatively consumed registers as consumed.
if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
return false;
} else {
@@ -509,9 +504,9 @@
VarList &Args = Func->getArgs();
TargetARM32::CallingConv CC;
- // For each register argument, replace Arg in the argument list with the
- // home register. Then generate an instruction in the prolog to copy the
- // home register to the assigned location of Arg.
+ // For each register argument, replace Arg in the argument list with the home
+ // register. Then generate an instruction in the prolog to copy the home
+ // register to the assigned location of Arg.
Context.init(Func->getEntryNode());
Context.setInsertPoint(Context.getCur());
@@ -568,13 +563,12 @@
// Helper function for addProlog().
//
-// This assumes Arg is an argument passed on the stack. This sets the
-// frame offset for Arg and updates InArgsSizeBytes according to Arg's
-// width. For an I64 arg that has been split into Lo and Hi components,
-// it calls itself recursively on the components, taking care to handle
-// Lo first because of the little-endian architecture. Lastly, this
-// function generates an instruction to copy Arg into its assigned
-// register if applicable.
+// This assumes Arg is an argument passed on the stack. This sets the frame
+// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
+// I64 arg that has been split into Lo and Hi components, it calls itself
+// recursively on the components, taking care to handle Lo first because of the
+// little-endian architecture. Lastly, this function generates an instruction
+// to copy Arg into its assigned register if applicable.
void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
size_t BasicFrameOffset,
size_t &InArgsSizeBytes) {
@@ -591,8 +585,8 @@
InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
- // If the argument variable has been assigned a register, we need to load
- // the value from the stack slot.
+ // If the argument variable has been assigned a register, we need to load the
+ // value from the stack slot.
if (Arg->hasReg()) {
assert(Ty != IceType_i64);
OperandARM32Mem *Mem = OperandARM32Mem::create(
@@ -606,10 +600,9 @@
} else {
_ldr(Arg, Mem);
}
- // This argument-copying instruction uses an explicit
- // OperandARM32Mem operand instead of a Variable, so its
- // fill-from-stack operation has to be tracked separately for
- // statistics.
+ // This argument-copying instruction uses an explicit OperandARM32Mem
+ // operand instead of a Variable, so its fill-from-stack operation has to
+ // be tracked separately for statistics.
Ctx->statsUpdateFills();
}
}
@@ -642,16 +635,15 @@
// * GlobalsAndSubsequentPaddingSize: areas 3 - 4
// * LocalsSpillAreaSize: area 5
// * SpillAreaSizeBytes: areas 2 - 6
- // Determine stack frame offsets for each Variable without a
- // register assignment. This can be done as one variable per stack
- // slot. Or, do coalescing by running the register allocator again
- // with an infinite set of registers (as a side effect, this gives
- // variables a second chance at physical register assignment).
+ // Determine stack frame offsets for each Variable without a register
+ // assignment. This can be done as one variable per stack slot. Or, do
+ // coalescing by running the register allocator again with an infinite set of
+ // registers (as a side effect, this gives variables a second chance at
+ // physical register assignment).
//
- // A middle ground approach is to leverage sparsity and allocate one
- // block of space on the frame for globals (variables with
- // multi-block lifetime), and one block to share for locals
- // (single-block lifetime).
+ // A middle ground approach is to leverage sparsity and allocate one block of
+ // space on the frame for globals (variables with multi-block lifetime), and
+ // one block to share for locals (single-block lifetime).
Context.init(Node);
Context.setInsertPoint(Context.getCur());
@@ -661,14 +653,13 @@
RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
VarList SortedSpilledVariables;
size_t GlobalsSize = 0;
- // If there is a separate locals area, this represents that area.
- // Otherwise it counts any variable not counted by GlobalsSize.
+ // If there is a separate locals area, this represents that area. Otherwise
+ // it counts any variable not counted by GlobalsSize.
SpillAreaSizeBytes = 0;
- // If there is a separate locals area, this specifies the alignment
- // for it.
+ // If there is a separate locals area, this specifies the alignment for it.
uint32_t LocalsSlotsAlignmentBytes = 0;
- // The entire spill locations area gets aligned to largest natural
- // alignment of the variables that have a spill slot.
+ // The entire spill locations area gets aligned to largest natural alignment
+ // of the variables that have a spill slot.
uint32_t SpillAreaAlignmentBytes = 0;
// For now, we don't have target-specific variables that need special
// treatment (no stack-slot-linked SpillVariable type).
@@ -682,12 +673,11 @@
uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
SpillAreaSizeBytes += GlobalsSize;
- // Add push instructions for preserved registers.
- // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).
- // Unlike x86, ARM also has callee-saved float/vector registers.
- // The "vpush" instruction can handle a whole list of float/vector
- // registers, but it only handles contiguous sequences of registers
- // by specifying the start and the length.
+ // Add push instructions for preserved registers. On ARM, "push" can push a
+ // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
+ // callee-saved float/vector registers. The "vpush" instruction can handle a
+ // whole list of float/vector registers, but it only handles contiguous
+ // sequences of registers by specifying the start and the length.
VarList GPRsToPreserve;
GPRsToPreserve.reserve(CalleeSaves.size());
uint32_t NumCallee = 0;
@@ -704,8 +694,8 @@
}
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (CalleeSaves[i] && RegsUsed[i]) {
- // TODO(jvoung): do separate vpush for each floating point
- // register segment and += 4, or 8 depending on type.
+ // TODO(jvoung): do separate vpush for each floating point register
+ // segment and += 4, or 8 depending on type.
++NumCallee;
PreservedRegsSizeBytes += 4;
GPRsToPreserve.push_back(getPhysicalRegister(i));
@@ -724,10 +714,10 @@
Context.insert(InstFakeUse::create(Func, FP));
}
- // Align the variables area. SpillAreaPaddingBytes is the size of
- // the region after the preserved registers and before the spill areas.
- // LocalsSlotsPaddingBytes is the amount of padding between the globals
- // and locals area if they are separate.
+ // Align the variables area. SpillAreaPaddingBytes is the size of the region
+ // after the preserved registers and before the spill areas.
+ // LocalsSlotsPaddingBytes is the amount of padding between the globals and
+ // locals area if they are separate.
assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
uint32_t SpillAreaPaddingBytes = 0;
@@ -758,9 +748,9 @@
resetStackAdjustment();
- // Fill in stack offsets for stack args, and copy args into registers
- // for those that were register-allocated. Args are pushed right to
- // left, so Arg[0] is closest to the stack/frame pointer.
+ // Fill in stack offsets for stack args, and copy args into registers for
+ // those that were register-allocated. Args are pushed right to left, so
+ // Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
size_t BasicFrameOffset = PreservedRegsSizeBytes;
if (!UsesFramePointer)
@@ -830,8 +820,8 @@
if (RI == E)
return;
- // Convert the reverse_iterator position into its corresponding
- // (forward) iterator position.
+ // Convert the reverse_iterator position into its corresponding (forward)
+ // iterator position.
InstList::iterator InsertPoint = RI.base();
--InsertPoint;
Context.init(Node);
@@ -840,9 +830,9 @@
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
if (UsesFramePointer) {
Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
- // For late-stage liveness analysis (e.g. asm-verbose mode),
- // adding a fake use of SP before the assignment of SP=FP keeps
- // previous SP adjustments from being dead-code eliminated.
+ // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
+ // use of SP before the assignment of SP=FP keeps previous SP adjustments
+ // from being dead-code eliminated.
Context.insert(InstFakeUse::create(Func, SP));
_mov(SP, FP);
} else {
@@ -868,8 +858,8 @@
if (!MaybeLeafFunc) {
CalleeSaves[RegARM32::Reg_lr] = true;
}
- // Pop registers in ascending order just like push
- // (instead of in reverse order).
+ // Pop registers in ascending order just like push (instead of in reverse
+ // order).
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (CalleeSaves[i] && RegsUsed[i]) {
GPRsToRestore.push_back(getPhysicalRegister(i));
@@ -903,17 +893,16 @@
bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const {
constexpr bool SignExt = false;
- // TODO(jvoung): vldr of FP stack slots has a different limit from the
- // plain stackSlotType().
+ // TODO(jvoung): vldr of FP stack slots has a different limit from the plain
+ // stackSlotType().
return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset);
}
StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var,
Variable *OrigBaseReg) {
int32_t Offset = Var->getStackOffset();
- // Legalize will likely need a movw/movt combination, but if the top
- // bits are all 0 from negating the offset and subtracting, we could
- // use that instead.
+ // Legalize will likely need a movw/movt combination, but if the top bits are
+ // all 0 from negating the offset and subtracting, we could use that instead.
bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
if (ShouldSub)
Offset = -Offset;
@@ -949,15 +938,15 @@
return;
Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
int32_t StackAdjust = 0;
- // Do a fairly naive greedy clustering for now. Pick the first stack slot
+ // Do a fairly naive greedy clustering for now. Pick the first stack slot
// that's out of bounds and make a new base reg using the architecture's temp
- // register. If that works for the next slot, then great. Otherwise, create
- // a new base register, clobbering the previous base register. Never share a
- // base reg across different basic blocks. This isn't ideal if local and
+ // register. If that works for the next slot, then great. Otherwise, create a
+ // new base register, clobbering the previous base register. Never share a
+ // base reg across different basic blocks. This isn't ideal if local and
// multi-block variables are far apart and their references are interspersed.
- // It may help to be more coordinated about assign stack slot numbers
- // and may help to assign smaller offsets to higher-weight variables
- // so that they don't depend on this legalization.
+ // It may help to be more coordinated about assign stack slot numbers and may
+ // help to assign smaller offsets to higher-weight variables so that they
+ // don't depend on this legalization.
for (CfgNode *Node : Func->getNodes()) {
Context.init(Node);
StackVariable *NewBaseReg = nullptr;
@@ -986,7 +975,7 @@
continue;
}
}
- // For now, only Mov instructions can have stack variables. We need to
+ // For now, only Mov instructions can have stack variables. We need to
// know the type of instruction because we currently create a fresh one
// to replace Dest/Source, rather than mutate in place.
auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr);
@@ -1117,15 +1106,15 @@
static_cast<uint32_t>(Const->getValue() >> 32));
}
if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
- // Conservatively disallow memory operands with side-effects
- // in case of duplication.
+ // Conservatively disallow memory operands with side-effects in case of
+ // duplication.
assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
Mem->getAddrMode() == OperandARM32Mem::NegOffset);
const Type SplitType = IceType_i32;
if (Mem->isRegReg()) {
// We have to make a temp variable T, and add 4 to either Base or Index.
- // The Index may be shifted, so adding 4 can mean something else.
- // Thus, prefer T := Base + 4, and use T as the new Base.
+ // The Index may be shifted, so adding 4 can mean something else. Thus,
+ // prefer T := Base + 4, and use T as the new Base.
Variable *Base = Mem->getBase();
Constant *Four = Ctx->getConstantInt32(4);
Variable *NewBase = Func->makeVariable(Base->getType());
@@ -1144,8 +1133,8 @@
// We have to make a temp variable and add 4 to either Base or Offset.
// If we add 4 to Offset, this will convert a non-RegReg addressing
// mode into a RegReg addressing mode. Since NaCl sandboxing disallows
- // RegReg addressing modes, prefer adding to base and replacing instead.
- // Thus we leave the old offset alone.
+ // RegReg addressing modes, prefer adding to base and replacing
+ // instead. Thus we leave the old offset alone.
Constant *Four = Ctx->getConstantInt32(4);
Variable *NewBase = Func->makeVariable(Base->getType());
lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
@@ -1195,11 +1184,11 @@
void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
UsesFramePointer = true;
- // Conservatively require the stack to be aligned. Some stack
- // adjustment operations implemented below assume that the stack is
- // aligned before the alloca. All the alloca code ensures that the
- // stack alignment is preserved after the alloca. The stack alignment
- // restriction can be relaxed in some cases.
+ // Conservatively require the stack to be aligned. Some stack adjustment
+ // operations implemented below assume that the stack is aligned before the
+ // alloca. All the alloca code ensures that the stack alignment is preserved
+ // after the alloca. The stack alignment restriction can be relaxed in some
+ // cases.
NeedsStackAlignment = true;
// TODO(stichnot): minimize the number of adjustments of SP, etc.
@@ -1226,8 +1215,8 @@
Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
_sub(SP, SP, SubAmount);
} else {
- // Non-constant sizes need to be adjusted to the next highest
- // multiple of the required alignment at runtime.
+ // Non-constant sizes need to be adjusted to the next highest multiple of
+ // the required alignment at runtime.
TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
Variable *T = makeReg(IceType_i32);
_mov(T, TotalSize);
@@ -1265,8 +1254,8 @@
case IceType_i64: {
Variable *ScratchReg = makeReg(IceType_i32);
_orrs(ScratchReg, SrcLoReg, SrcHi);
- // ScratchReg isn't going to be used, but we need the
- // side-effect of setting flags from this operation.
+ // ScratchReg isn't going to be used, but we need the side-effect of
+ // setting flags from this operation.
Context.insert(InstFakeUse::create(Func, ScratchReg));
}
}
@@ -1310,21 +1299,21 @@
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
- // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
- // to legalize Src0 to flex or Src1 to flex and there is a reversible
- // instruction. E.g., reverse subtract with immediate, register vs
- // register, immediate.
- // Or it may be the case that the operands aren't swapped, but the
- // bits can be flipped and a different operation applied.
- // E.g., use BIC (bit clear) instead of AND for some masks.
+ // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to
+ // legalize Src0 to flex or Src1 to flex and there is a reversible
+ // instruction. E.g., reverse subtract with immediate, register vs register,
+ // immediate.
+ // Or it may be the case that the operands aren't swapped, but the bits can
+ // be flipped and a different operation applied. E.g., use BIC (bit clear)
+ // instead of AND for some masks.
Operand *Src0 = legalizeUndef(Inst->getSrc(0));
Operand *Src1 = legalizeUndef(Inst->getSrc(1));
if (Dest->getType() == IceType_i64) {
- // These helper-call-involved instructions are lowered in this
- // separate switch. This is because we would otherwise assume that
- // we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused
- // with helper calls, and such unused/redundant instructions will fail
- // liveness analysis under -Om1 setting.
+ // These helper-call-involved instructions are lowered in this separate
+ // switch. This is because we would otherwise assume that we need to
+ // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
+ // helper calls, and such unused/redundant instructions will fail liveness
+ // analysis under -Om1 setting.
switch (Inst->getOp()) {
default:
break;
@@ -1332,11 +1321,10 @@
case InstArithmetic::Sdiv:
case InstArithmetic::Urem:
case InstArithmetic::Srem: {
- // Check for divide by 0 (ARM normally doesn't trap, but we want it
- // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized
- // to a register, which will hide a constant source operand.
- // Instead, check the not-yet-legalized Src1 to optimize-out a divide
- // by 0 check.
+ // Check for divide by 0 (ARM normally doesn't trap, but we want it to
+ // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
+ // register, which will hide a constant source operand. Instead, check
+ // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
if (C64->getValue() == 0) {
_trap();
@@ -1348,8 +1336,8 @@
div0Check(IceType_i64, Src1Lo, Src1Hi);
}
// Technically, ARM has their own aeabi routines, but we can use the
- // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div,
- // but uses the more standard __moddi3 for rem.
+ // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
+ // the more standard __moddi3 for rem.
const char *HelperName = "";
switch (Inst->getOp()) {
default:
@@ -1472,12 +1460,11 @@
// lsl t_lo, b.lo, c.lo
// a.lo = t_lo
// a.hi = t_hi
- // Can be strength-reduced for constant-shifts, but we don't do
- // that for now.
- // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
- // On ARM, shifts only take the lower 8 bits of the shift register,
- // and saturate to the range 0-32, so the negative value will
- // saturate to 32.
+ // Can be strength-reduced for constant-shifts, but we don't do that for
+ // now.
+ // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
+ // ARM, shifts only take the lower 8 bits of the shift register, and
+ // saturate to the range 0-32, so the negative value will saturate to 32.
Variable *T_Hi = makeReg(IceType_i32);
Variable *Src1RLo = legalizeToReg(Src1Lo);
Constant *ThirtyTwo = Ctx->getConstantInt32(32);
@@ -1493,8 +1480,8 @@
_mov(DestHi, T_Hi);
Variable *T_Lo = makeReg(IceType_i32);
// _mov seems to sometimes have better register preferencing than lsl.
- // Otherwise mov w/ lsl shifted register is a pseudo-instruction
- // that maps to lsl.
+ // Otherwise mov w/ lsl shifted register is a pseudo-instruction that
+ // maps to lsl.
_mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
OperandARM32::LSL, Src1RLo));
_mov(DestLo, T_Lo);
@@ -1513,9 +1500,9 @@
// a.hi = t_hi
case InstArithmetic::Ashr: {
// a=b>>c (signed) ==> ...
- // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
- // and the next orr should be conditioned on PLUS. The last two
- // right shifts should also be arithmetic.
+ // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, and the
+ // next orr should be conditioned on PLUS. The last two right shifts
+ // should also be arithmetic.
bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
Variable *T_Lo = makeReg(IceType_i32);
Variable *Src1RLo = legalizeToReg(Src1Lo);
@@ -1723,13 +1710,13 @@
Operand *NewSrc;
if (Dest->hasReg()) {
// If Dest already has a physical register, then legalize the Src operand
- // into a Variable with the same register assignment. This especially
+ // into a Variable with the same register assignment. This especially
// helps allow the use of Flex operands.
NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
} else {
- // Dest could be a stack operand. Since we could potentially need
- // to do a Store (and store can only have Register operands),
- // legalize this to a register.
+ // Dest could be a stack operand. Since we could potentially need to do a
+ // Store (and store can only have Register operands), legalize this to a
+ // register.
NewSrc = legalize(Src0, Legal_Reg);
}
if (isVectorType(Dest->getType())) {
@@ -1810,25 +1797,24 @@
}
}
- // Adjust the parameter area so that the stack is aligned. It is
- // assumed that the stack is already aligned at the start of the
- // calling sequence.
+ // Adjust the parameter area so that the stack is aligned. It is assumed that
+ // the stack is already aligned at the start of the calling sequence.
ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
- // Subtract the appropriate amount for the argument area. This also
- // takes care of setting the stack adjustment during emission.
+ // Subtract the appropriate amount for the argument area. This also takes
+ // care of setting the stack adjustment during emission.
//
- // TODO: If for some reason the call instruction gets dead-code
- // eliminated after lowering, we would need to ensure that the
- // pre-call and the post-call esp adjustment get eliminated as well.
+ // TODO: If for some reason the call instruction gets dead-code eliminated
+ // after lowering, we would need to ensure that the pre-call and the
+ // post-call esp adjustment get eliminated as well.
if (ParameterAreaSizeBytes) {
Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
Legal_Reg | Legal_Flex);
_adjust_stack(ParameterAreaSizeBytes, SubAmount);
}
- // Copy arguments that are passed on the stack to the appropriate
- // stack locations.
+ // Copy arguments that are passed on the stack to the appropriate stack
+ // locations.
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
for (auto &StackArg : StackArgs) {
ConstantInteger32 *Loc =
@@ -1850,9 +1836,9 @@
// Copy arguments to be passed in registers to the appropriate registers.
for (auto &GPRArg : GPRArgs) {
Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second);
- // Generate a FakeUse of register arguments so that they do not get
- // dead code eliminated as a result of the FakeKill of scratch
- // registers after the call.
+ // Generate a FakeUse of register arguments so that they do not get dead
+ // code eliminated as a result of the FakeKill of scratch registers after
+ // the call.
Context.insert(InstFakeUse::create(Func, Reg));
}
for (auto &FPArg : FPArgs) {
@@ -1860,8 +1846,8 @@
Context.insert(InstFakeUse::create(Func, Reg));
}
- // Generate the call instruction. Assign its result to a temporary
- // with high register allocation weight.
+ // Generate the call instruction. Assign its result to a temporary with high
+ // register allocation weight.
Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr;
@@ -1901,12 +1887,12 @@
}
}
Operand *CallTarget = Instr->getCallTarget();
- // TODO(jvoung): Handle sandboxing.
- // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
+ // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing =
+ // Ctx->getFlags().getUseSandboxing();
- // Allow ConstantRelocatable to be left alone as a direct call,
- // but force other constants like ConstantInteger32 to be in
- // a register and make it an indirect call.
+ // Allow ConstantRelocatable to be left alone as a direct call, but force
+ // other constants like ConstantInteger32 to be in a register and make it an
+ // indirect call.
if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
CallTarget = legalize(CallTarget, Legal_Reg);
}
@@ -1915,8 +1901,8 @@
if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi));
- // Add the appropriate offset to SP. The call instruction takes care
- // of resetting the stack offset during emission.
+ // Add the appropriate offset to SP. The call instruction takes care of
+ // resetting the stack offset during emission.
if (ParameterAreaSizeBytes) {
Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
Legal_Reg | Legal_Flex);
@@ -2024,8 +2010,8 @@
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Variable *T_Lo = makeReg(DestLo->getType());
- // i32 and i1 can just take up the whole register.
- // i32 doesn't need uxt, while i1 will have an and mask later anyway.
+ // i32 and i1 can just take up the whole register. i32 doesn't need uxt,
+ // while i1 will have an and mask later anyway.
if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
_mov(T_Lo, Src0RF);
@@ -2046,9 +2032,9 @@
Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
Constant *One = Ctx->getConstantInt32(1);
Variable *T = makeReg(Dest->getType());
- // Just use _mov instead of _uxt since all registers are 32-bit.
- // _uxt requires the source to be a register so could have required
- // a _mov from legalize anyway.
+ // Just use _mov instead of _uxt since all registers are 32-bit. _uxt
+ // requires the source to be a register so could have required a _mov
+ // from legalize anyway.
_mov(T, Src0RF);
_and(T, T, One);
_mov(Dest, T);
@@ -2288,8 +2274,8 @@
// mov.<C2> t, #0 mov.<C2> t, #0
// mov a, t mov a, t
// where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
- // is used for signed compares. In some cases, b and c need to be swapped
- // as well.
+ // is used for signed compares. In some cases, b and c need to be swapped as
+ // well.
//
// LLVM does:
// for EQ and NE:
@@ -2299,13 +2285,13 @@
// mov.<C> t, #1
// mov a, t
//
- // that's nice in that it's just as short but has fewer dependencies
- // for better ILP at the cost of more registers.
+ // that's nice in that it's just as short but has fewer dependencies for
+ // better ILP at the cost of more registers.
//
- // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with
- // two unconditional mov #0, two cmps, two conditional mov #1,
- // and one conditonal reg mov. That has few dependencies for good ILP,
- // but is a longer sequence.
+ // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
+ // unconditional mov #0, two cmps, two conditional mov #1, and one
+ // conditional reg mov. That has few dependencies for good ILP, but is a
+ // longer sequence.
//
// So, we are going with the GCC version since it's usually better (except
// perhaps for eq/ne). We could revisit special-casing eq/ne later.
@@ -2333,8 +2319,8 @@
Variable *ScratchReg = makeReg(IceType_i32);
_cmp(Src0Lo, Src1LoRF);
_sbcs(ScratchReg, Src0Hi, Src1HiRF);
- // ScratchReg isn't going to be used, but we need the
- // side-effect of setting flags from this operation.
+ // ScratchReg isn't going to be used, but we need the side-effect of
+ // setting flags from this operation.
Context.insert(InstFakeUse::create(Func, ScratchReg));
} else {
_cmp(Src0Hi, Src1HiRF);
@@ -2354,8 +2340,8 @@
// mov.C1 t, #0
// mov.C2 t, #1
// mov a, t
- // where the unsigned/sign extension is not needed for 32-bit.
- // They also have special cases for EQ and NE. E.g., for NE:
+ // where the unsigned/sign extension is not needed for 32-bit. They also have
+ // special cases for EQ and NE. E.g., for NE:
// <extend to tb, tc>
// subs t, tb, tc
// movne t, #1
@@ -2368,13 +2354,13 @@
// mov.<C> t, #1
// mov a, t
//
- // the left shift is by 0, 16, or 24, which allows the comparison to focus
- // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).
- // For the unsigned case, for some reason it does similar to GCC and does
- // a uxtb first. It's not clear to me why that special-casing is needed.
+ // the left shift is by 0, 16, or 24, which allows the comparison to focus on
+ // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
+ // the unsigned case, for some reason it does similar to GCC and does a uxtb
+ // first. It's not clear to me why that special-casing is needed.
//
- // We'll go with the LLVM way for now, since it's shorter and has just as
- // few dependencies.
+ // We'll go with the LLVM way for now, since it's shorter and has just as few
+ // dependencies.
int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
assert(ShiftAmt >= 0);
Constant *ShiftConst = nullptr;
@@ -2417,9 +2403,9 @@
UnimplementedError(Func->getContext()->getFlags());
return;
case Intrinsics::AtomicFenceAll:
- // NOTE: FenceAll should prevent and load/store from being moved
- // across the fence (both atomic and non-atomic). The InstARM32Mfence
- // instruction is currently marked coarsely as "HasSideEffects".
+ // NOTE: FenceAll should prevent and load/store from being moved across the
+ // fence (both atomic and non-atomic). The InstARM32Mfence instruction is
+ // currently marked coarsely as "HasSideEffects".
UnimplementedError(Func->getContext()->getFlags());
return;
case Intrinsics::AtomicIsLockFree: {
@@ -2477,10 +2463,10 @@
Call->addArg(Val);
lowerCall(Call);
// The popcount helpers always return 32-bit values, while the intrinsic's
- // signature matches some 64-bit platform's native instructions and
- // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest
- // just in case the user doesn't do that in the IR or doesn't toss the bits
- // via truncate.
+ // signature matches some 64-bit platform's native instructions and expect
+ // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in
+ // case the user doesn't do that in the IR or doesn't toss the bits via
+ // truncate.
if (Val->getType() == IceType_i64) {
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Constant *Zero = Ctx->getConstantZero(IceType_i32);
@@ -2491,8 +2477,8 @@
return;
}
case Intrinsics::Ctlz: {
- // The "is zero undef" parameter is ignored and we always return
- // a well-defined value.
+ // The "is zero undef" parameter is ignored and we always return a
+ // well-defined value.
Operand *Val = Instr->getArg(0);
Variable *ValLoR;
Variable *ValHiR = nullptr;
@@ -2639,9 +2625,9 @@
Variable *T2 = makeReg(IceType_i32);
_add(T2, T, ThirtyTwo);
_clz(T2, ValHiR, CondARM32::NE);
- // T2 is actually a source as well when the predicate is not AL
- // (since it may leave T2 alone). We use set_dest_nonkillable to
- // prolong the liveness of T2 as if it was used as a source.
+ // T2 is actually a source as well when the predicate is not AL (since it
+ // may leave T2 alone). We use set_dest_nonkillable to prolong the liveness
+ // of T2 as if it was used as a source.
_set_dest_nonkillable();
_mov(DestLo, T2);
Variable *T3 = nullptr;
@@ -2654,15 +2640,14 @@
}
void TargetARM32::lowerLoad(const InstLoad *Load) {
- // A Load instruction can be treated the same as an Assign
- // instruction, after the source operand is transformed into an
- // OperandARM32Mem operand.
+ // A Load instruction can be treated the same as an Assign instruction, after
+ // the source operand is transformed into an OperandARM32Mem operand.
Type Ty = Load->getDest()->getType();
Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
Variable *DestLoad = Load->getDest();
- // TODO(jvoung): handled folding opportunities. Sign and zero extension
- // can be folded into a load.
+ // TODO(jvoung): handled folding opportunities. Sign and zero extension can
+ // be folded into a load.
InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
lowerAssign(Assign);
}
@@ -2708,17 +2693,15 @@
_mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
}
}
- // Add a ret instruction even if sandboxing is enabled, because
- // addEpilog explicitly looks for a ret instruction as a marker for
- // where to insert the frame removal instructions.
- // addEpilog is responsible for restoring the "lr" register as needed
- // prior to this ret instruction.
+ // Add a ret instruction even if sandboxing is enabled, because addEpilog
+ // explicitly looks for a ret instruction as a marker for where to insert the
+ // frame removal instructions. addEpilog is responsible for restoring the
+ // "lr" register as needed prior to this ret instruction.
_ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
- // Add a fake use of sp to make sure sp stays alive for the entire
- // function. Otherwise post-call sp adjustments get dead-code
- // eliminated. TODO: Are there more places where the fake use
- // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
- // have a ret instruction.
+ // Add a fake use of sp to make sure sp stays alive for the entire function.
+ // Otherwise post-call sp adjustments get dead-code eliminated.
+ // TODO: Are there more places where the fake use should be inserted? E.g.
+ // "void f(int n){while(1) g(n);}" may not have a ret instruction.
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Context.insert(InstFakeUse::create(Func, SP));
}
@@ -2852,8 +2835,8 @@
if (isVectorType(Ty) || isFloatingType(Ty)) {
_vmov(Reg, Src);
} else {
- // Mov's Src operand can really only be the flexible second operand type
- // or a register. Users should guarantee that.
+ // Mov's Src operand can really only be the flexible second operand type or
+ // a register. Users should guarantee that.
_mov(Reg, Src);
}
return Reg;
@@ -2862,18 +2845,17 @@
Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
int32_t RegNum) {
Type Ty = From->getType();
- // Assert that a physical register is allowed. To date, all calls
- // to legalize() allow a physical register. Legal_Flex converts
- // registers to the right type OperandARM32FlexReg as needed.
+ // Assert that a physical register is allowed. To date, all calls to
+ // legalize() allow a physical register. Legal_Flex converts registers to the
+ // right type OperandARM32FlexReg as needed.
assert(Allowed & Legal_Reg);
- // Go through the various types of operands:
- // OperandARM32Mem, OperandARM32Flex, Constant, and Variable.
- // Given the above assertion, if type of operand is not legal
- // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy
- // to a register.
+ // Go through the various types of operands: OperandARM32Mem,
+ // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
+ // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
+ // can always copy to a register.
if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
- // Before doing anything with a Mem operand, we need to ensure
- // that the Base and Index components are in physical registers.
+ // Before doing anything with a Mem operand, we need to ensure that the
+ // Base and Index components are in physical registers.
Variable *Base = Mem->getBase();
Variable *Index = Mem->getIndex();
Variable *RegBase = nullptr;
@@ -2918,8 +2900,8 @@
if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
From = FlexReg->getReg();
- // Fall through and let From be checked as a Variable below,
- // where it may or may not need a register.
+ // Fall through and let From be checked as a Variable below, where it
+ // may or may not need a register.
} else {
return copyToReg(Flex, RegNum);
}
@@ -2944,10 +2926,10 @@
uint32_t RotateAmt;
uint32_t Immed_8;
uint32_t Value = static_cast<uint32_t>(C32->getValue());
- // Check if the immediate will fit in a Flexible second operand,
- // if a Flexible second operand is allowed. We need to know the exact
- // value, so that rules out relocatable constants.
- // Also try the inverse and use MVN if possible.
+ // Check if the immediate will fit in a Flexible second operand, if a
+ // Flexible second operand is allowed. We need to know the exact value,
+ // so that rules out relocatable constants. Also try the inverse and use
+ // MVN if possible.
if (CanBeFlex &&
OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
@@ -2977,12 +2959,12 @@
} else {
assert(isScalarFloatingType(Ty));
// Load floats/doubles from literal pool.
- // TODO(jvoung): Allow certain immediates to be encoded directly in
- // an operand. See Table A7-18 of the ARM manual:
- // "Floating-point modified immediate constants".
- // Or, for 32-bit floating point numbers, just encode the raw bits
- // into a movw/movt pair to GPR, and vmov to an SREG, instead of using
- // a movw/movt pair to get the const-pool address then loading to SREG.
+ // TODO(jvoung): Allow certain immediates to be encoded directly in an
+ // operand. See Table A7-18 of the ARM manual: "Floating-point modified
+ // immediate constants". Or, for 32-bit floating point numbers, just
+ // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG,
+ // instead of using a movw/movt pair to get the const-pool address then
+ // loading to SREG.
std::string Buffer;
llvm::raw_string_ostream StrBuf(Buffer);
llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
@@ -2997,9 +2979,9 @@
}
if (auto Var = llvm::dyn_cast<Variable>(From)) {
- // Check if the variable is guaranteed a physical register. This
- // can happen either when the variable is pre-colored or when it is
- // assigned infinite weight.
+ // Check if the variable is guaranteed a physical register. This can happen
+ // either when the variable is pre-colored or when it is assigned infinite
+ // weight.
bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
// We need a new physical register for the operand if:
// Mem is not allowed and Var isn't guaranteed a physical
@@ -3025,17 +3007,16 @@
Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) {
Type Ty = From->getType();
if (llvm::isa<ConstantUndef>(From)) {
- // Lower undefs to zero. Another option is to lower undefs to an
- // uninitialized register; however, using an uninitialized register
- // results in less predictable code.
+ // Lower undefs to zero. Another option is to lower undefs to an
+ // uninitialized register; however, using an uninitialized register results
+ // in less predictable code.
//
- // If in the future the implementation is changed to lower undef
- // values to uninitialized registers, a FakeDef will be needed:
- // Context.insert(InstFakeDef::create(Func, Reg));
- // This is in order to ensure that the live range of Reg is not
- // overestimated. If the constant being lowered is a 64 bit value,
- // then the result should be split and the lo and hi components will
- // need to go in uninitialized registers.
+ // If in the future the implementation is changed to lower undef values to
+ // uninitialized registers, a FakeDef will be needed:
+ // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
+ // ensure that the live range of Reg is not overestimated. If the constant
+ // being lowered is a 64 bit value, then the result should be split and the
+ // lo and hi components will need to go in uninitialized registers.
if (isVectorType(Ty))
return makeVectorOfZeros(Ty, RegNum);
return Ctx->getConstantZero(Ty);
@@ -3045,15 +3026,15 @@
OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
- // It may be the case that address mode optimization already creates
- // an OperandARM32Mem, so in that case it wouldn't need another level
- // of transformation.
+ // It may be the case that address mode optimization already creates an
+ // OperandARM32Mem, so in that case it wouldn't need another level of
+ // transformation.
if (Mem) {
return llvm::cast<OperandARM32Mem>(legalize(Mem));
}
- // If we didn't do address mode optimization, then we only
- // have a base/offset to work with. ARM always requires a base
- // register, so just use that to hold the operand.
+ // If we didn't do address mode optimization, then we only have a base/offset
+ // to work with. ARM always requires a base register, so just use that to
+ // hold the operand.
Variable *Base = legalizeToReg(Operand);
return OperandARM32Mem::create(
Func, Ty, Base,
@@ -3076,9 +3057,9 @@
uint32_t RotateAmt;
uint32_t Immed_8;
Operand *Mask;
- // Use AND or BIC to mask off the bits, depending on which immediate fits
- // (if it fits at all). Assume Align is usually small, in which case BIC
- // works better. Thus, this rounds down to the alignment.
+ // Use AND or BIC to mask off the bits, depending on which immediate fits (if
+ // it fits at all). Assume Align is usually small, in which case BIC works
+ // better. Thus, this rounds down to the alignment.
if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
_bic(Reg, Reg, Mask);
@@ -3170,17 +3151,18 @@
OstreamLocker L(Ctx);
Ostream &Str = Ctx->getStrEmit();
Str << ".syntax unified\n";
- // Emit build attributes in format: .eabi_attribute TAG, VALUE.
- // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture"
- // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
+ // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
+ // "Addenda to, and Errata in the ABI for the ARM architecture"
+ // http://infocenter.arm.com
+ // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
//
- // Tag_conformance should be be emitted first in a file-scope
- // sub-subsection of the first public subsection of the attributes.
+ // Tag_conformance should be be emitted first in a file-scope sub-subsection
+ // of the first public subsection of the attributes.
Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
- // Chromebooks are at least A15, but do A9 for higher compat.
- // For some reason, the LLVM ARM asm parser has the .cpu directive override
- // the mattr specified on the commandline. So to test hwdiv, we need to set
- // the .cpu directive higher (can't just rely on --mattr=...).
+ // Chromebooks are at least A15, but do A9 for higher compat. For some
+ // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
+ // specified on the commandline. So to test hwdiv, we need to set the .cpu
+ // directive higher (can't just rely on --mattr=...).
if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
Str << ".cpu cortex-a15\n";
} else {
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 57e0b5a..5578289 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -78,8 +78,8 @@
SizeT getReservedTmpReg() const { return RegARM32::Reg_ip; }
size_t typeWidthInBytesOnStack(Type Ty) const override {
- // Round up to the next multiple of 4 bytes. In particular, i1,
- // i8, and i16 are rounded up to 4 bytes.
+ // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
+ // are rounded up to 4 bytes.
return (typeWidthInBytes(Ty) + 3) & ~3;
}
@@ -101,9 +101,8 @@
void addProlog(CfgNode *Node) override;
void addEpilog(CfgNode *Node) override;
- /// Ensure that a 64-bit Variable has been split into 2 32-bit
- /// Variables, creating them if necessary. This is needed for all
- /// I64 operations.
+ /// Ensure that a 64-bit Variable has been split into 2 32-bit Variables,
+ /// creating them if necessary. This is needed for all I64 operations.
void split64(Variable *Var);
Operand *loOperand(Operand *Operand);
Operand *hiOperand(Operand *Operand);
@@ -147,8 +146,8 @@
enum OperandLegalization {
Legal_None = 0,
Legal_Reg = 1 << 0, /// physical register, not stack location
- Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated
- /// small immediates, or shifted registers.
+ Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
+ /// immediates, or shifted registers.
Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12]
Legal_All = ~Legal_None
};
@@ -171,9 +170,8 @@
const llvm::SmallBitVector &ExcludeRegisters,
uint64_t Salt) const override;
- // If a divide-by-zero check is needed, inserts a:
- // test; branch .LSKIP; trap; .LSKIP: <continuation>.
- // If no check is needed nothing is inserted.
+ // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;
+ // .LSKIP: <continuation>. If no check is needed nothing is inserted.
void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
using ExtInstr = void (TargetARM32::*)(Variable *, Variable *,
CondARM32::Cond);
@@ -185,9 +183,9 @@
void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi);
- // The following are helpers that insert lowered ARM32 instructions
- // with minimal syntactic overhead, so that the lowering code can
- // look as close to assembly as practical.
+ // The following are helpers that insert lowered ARM32 instructions with
+ // minimal syntactic overhead, so that the lowering code can look as close to
+ // assembly as practical.
void _add(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
@@ -265,9 +263,9 @@
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Mls::create(Func, Dest, Src0, Src1, Acc, Pred));
}
- /// If Dest=nullptr is passed in, then a new variable is created,
- /// marked as infinite register allocation weight, and returned
- /// through the in/out Dest argument.
+ /// If Dest=nullptr is passed in, then a new variable is created, marked as
+ /// infinite register allocation weight, and returned through the in/out Dest
+ /// argument.
void _mov(Variable *&Dest, Operand *Src0,
CondARM32::Cond Pred = CondARM32::AL,
int32_t RegNum = Variable::NoRegister) {
@@ -281,8 +279,8 @@
NewInst->setDestNonKillable();
Context.insert(NewInst);
}
- /// The Operand can only be a 16-bit immediate or a ConstantRelocatable
- /// (with an upper16 relocation).
+ /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
+ /// an upper16 relocation).
void _movt(Variable *Dest, Operand *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Movt::create(Func, Dest, Src0, Pred));
@@ -378,8 +376,8 @@
Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(
InstARM32Umull::create(Func, DestLo, DestHi, Src0, Src1, Pred));
- // Model the modification to the second dest as a fake def.
- // Note that the def is not predicated.
+ // Model the modification to the second dest as a fake def. Note that the
+ // def is not predicated.
Context.insert(InstFakeDef::create(Func, DestHi, DestLo));
}
void _uxt(Variable *Dest, Variable *Src0,
@@ -400,11 +398,10 @@
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Vldr::create(Func, Dest, Src, Pred));
}
- // There are a whole bunch of vmov variants, to transfer within
- // S/D/Q registers, between core integer registers and S/D,
- // and from small immediates into S/D.
- // For integer -> S/D/Q there is a variant which takes two integer
- // register to fill a D, or to fill two consecutive S registers.
+ // There are a whole bunch of vmov variants, to transfer within S/D/Q
+ // registers, between core integer registers and S/D, and from small
+ // immediates into S/D. For integer -> S/D/Q there is a variant which takes
+ // two integer register to fill a D, or to fill two consecutive S registers.
// Vmov can also be used to insert-element. E.g.,
// "vmov.8 d0[1], r0"
// but insert-element is a "two-address" operation where only part of the
@@ -440,8 +437,8 @@
}
/// Run a pass through stack variables and ensure that the offsets are legal.
- /// If the offset is not legal, use a new base register that accounts for
- /// the offset, such that the addressing mode offset bits are now legal.
+ /// If the offset is not legal, use a new base register that accounts for the
+ /// offset, such that the addressing mode offset bits are now legal.
void legalizeStackSlots();
/// Returns true if the given Offset can be represented in a stack ldr/str.
bool isLegalVariableStackOffset(int32_t Offset) const;
@@ -464,11 +461,11 @@
/// Helper class that understands the Calling Convention and register
/// assignments. The first few integer type parameters can use r0-r3,
/// regardless of their position relative to the floating-point/vector
- /// arguments in the argument list. Floating-point and vector arguments
- /// can use q0-q3 (aka d0-d7, s0-s15). Technically, arguments that can
- /// start with registers but extend beyond the available registers can be
- /// split between the registers and the stack. However, this is typically
- /// for passing GPR structs by value, and PNaCl transforms expand this out.
+ /// arguments in the argument list. Floating-point and vector arguments can
+ /// use q0-q3 (aka d0-d7, s0-s15). Technically, arguments that can start with
+ /// registers but extend beyond the available registers can be split between
+ /// the registers and the stack. However, this is typically for passing GPR
+ /// structs by value, and PNaCl transforms expand this out.
///
/// Also, at the point before the call, the stack must be aligned.
class CallingConv {
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 080e56b..b634306 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -43,9 +43,8 @@
} // end of anonymous namespace
TargetMIPS32::TargetMIPS32(Cfg *Func) : TargetLowering(Func) {
- // TODO: Don't initialize IntegerRegisters and friends every time.
- // Instead, initialize in some sort of static initializer for the
- // class.
+ // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
+ // initialize in some sort of static initializer for the class.
llvm::SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM);
llvm::SmallBitVector FloatRegisters(RegMIPS32::Reg_NUM);
llvm::SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM);
@@ -105,19 +104,18 @@
// Argument lowering
Func->doArgLowering();
- // Target lowering. This requires liveness analysis for some parts
- // of the lowering decisions, such as compare/branch fusing. If
- // non-lightweight liveness analysis is used, the instructions need
- // to be renumbered first. TODO: This renumbering should only be
- // necessary if we're actually calculating live intervals, which we
- // only do for register allocation.
+ // Target lowering. This requires liveness analysis for some parts of the
+ // lowering decisions, such as compare/branch fusing. If non-lightweight
+ // liveness analysis is used, the instructions need to be renumbered first.
+ // TODO: This renumbering should only be necessary if we're actually
+ // calculating live intervals, which we only do for register allocation.
Func->renumberInstructions();
if (Func->hasError())
return;
- // TODO: It should be sufficient to use the fastest liveness
- // calculation, i.e. livenessLightweight(). However, for some
- // reason that slows down the rest of the translation. Investigate.
+ // TODO: It should be sufficient to use the fastest liveness calculation,
+ // i.e. livenessLightweight(). However, for some reason that slows down the
+ // rest of the translation. Investigate.
Func->liveness(Liveness_Basic);
if (Func->hasError())
return;
@@ -128,19 +126,19 @@
return;
Func->dump("After MIPS32 codegen");
- // Register allocation. This requires instruction renumbering and
- // full liveness analysis.
+ // Register allocation. This requires instruction renumbering and full
+ // liveness analysis.
Func->renumberInstructions();
if (Func->hasError())
return;
Func->liveness(Liveness_Intervals);
if (Func->hasError())
return;
- // Validate the live range computations. The expensive validation
- // call is deliberately only made when assertions are enabled.
+ // Validate the live range computations. The expensive validation call is
+ // deliberately only made when assertions are enabled.
assert(Func->validateLiveness());
- // The post-codegen dump is done here, after liveness analysis and
- // associated cleanup, to make the dump cleaner and more useful.
+ // The post-codegen dump is done here, after liveness analysis and associated
+ // cleanup, to make the dump cleaner and more useful.
Func->dump("After initial MIPS32 codegen");
Func->getVMetadata()->init(VMK_All);
regAlloc(RAK_Global);
@@ -162,11 +160,10 @@
Func->contractEmptyNodes();
Func->reorderNodes();
- // Branch optimization. This needs to be done just before code
- // emission. In particular, no transformations that insert or
- // reorder CfgNodes should be done after branch optimization. We go
- // ahead and do it before nop insertion to reduce the amount of work
- // needed for searching for opportunities.
+ // Branch optimization. This needs to be done just before code emission. In
+ // particular, no transformations that insert or reorder CfgNodes should be
+ // done after branch optimization. We go ahead and do it before nop insertion
+ // to reduce the amount of work needed for searching for opportunities.
Func->doBranchOpt();
Func->dump("After branch optimization");
@@ -246,8 +243,8 @@
Reg = Func->makeVariable(Ty);
Reg->setRegNum(RegNum);
PhysicalRegisters[Ty][RegNum] = Reg;
- // Specially mark SP as an "argument" so that it is considered
- // live upon function entry.
+ // Specially mark SP as an "argument" so that it is considered live upon
+ // function entry.
if (RegNum == RegMIPS32::Reg_SP || RegNum == RegMIPS32::Reg_RA) {
Func->addImplicitArg(Reg);
Reg->setIgnoreLiveness();
@@ -321,11 +318,11 @@
void TargetMIPS32::lowerAlloca(const InstAlloca *Inst) {
UsesFramePointer = true;
- // Conservatively require the stack to be aligned. Some stack
- // adjustment operations implemented below assume that the stack is
- // aligned before the alloca. All the alloca code ensures that the
- // stack alignment is preserved after the alloca. The stack alignment
- // restriction can be relaxed in some cases.
+ // Conservatively require the stack to be aligned. Some stack adjustment
+ // operations implemented below assume that the stack is aligned before the
+ // alloca. All the alloca code ensures that the stack alignment is preserved
+ // after the alloca. The stack alignment restriction can be relaxed in some
+ // cases.
NeedsStackAlignment = true;
(void)Inst;
UnimplementedError(Func->getContext()->getFlags());
@@ -483,9 +480,9 @@
UnimplementedError(Func->getContext()->getFlags());
return;
case Intrinsics::AtomicFenceAll:
- // NOTE: FenceAll should prevent and load/store from being moved
- // across the fence (both atomic and non-atomic). The InstMIPS32Mfence
- // instruction is currently marked coarsely as "HasSideEffects".
+ // NOTE: FenceAll should prevent and load/store from being moved across the
+ // fence (both atomic and non-atomic). The InstMIPS32Mfence instruction is
+ // currently marked coarsely as "HasSideEffects".
UnimplementedError(Func->getContext()->getFlags());
return;
case Intrinsics::AtomicIsLockFree: {
@@ -549,9 +546,8 @@
return;
}
case Intrinsics::Memset: {
- // The value operand needs to be extended to a stack slot size
- // because the PNaCl ABI requires arguments to be at least 32 bits
- // wide.
+ // The value operand needs to be extended to a stack slot size because the
+ // PNaCl ABI requires arguments to be at least 32 bits wide.
Operand *ValOp = Instr->getArg(1);
assert(ValOp->getType() == IceType_i8);
Variable *ValExt = Func->makeVariable(stackSlotType());
@@ -651,10 +647,9 @@
UnimplementedError(Func->getContext()->getFlags());
}
-// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
-// preserve integrity of liveness analysis. Undef values are also
-// turned into zeroes, since loOperand() and hiOperand() don't expect
-// Undef input.
+// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
+// integrity of liveness analysis. Undef values are also turned into zeroes,
+// since loOperand() and hiOperand() don't expect Undef input.
void TargetMIPS32::prelowerPhis() {
UnimplementedError(Func->getContext()->getFlags());
}
@@ -662,8 +657,8 @@
void TargetMIPS32::postLower() {
if (Ctx->getFlags().getOptLevel() == Opt_m1)
return;
- // Find two-address non-SSA instructions where Dest==Src0, and set
- // the DestNonKillable flag to keep liveness analysis consistent.
+ // Find two-address non-SSA instructions where Dest==Src0, and set the
+ // DestNonKillable flag to keep liveness analysis consistent.
UnimplementedError(Func->getContext()->getFlags());
}
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index 52c09cf..1ae0b28 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -52,8 +52,8 @@
return UsesFramePointer ? RegMIPS32::Reg_FP : RegMIPS32::Reg_SP;
}
size_t typeWidthInBytesOnStack(Type Ty) const override {
- // Round up to the next multiple of 4 bytes. In particular, i1,
- // i8, and i16 are rounded up to 4 bytes.
+ // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
+ // are rounded up to 4 bytes.
return (typeWidthInBytes(Ty) + 3) & ~3;
}
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index f06150f..47f6ae1 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -8,9 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements the TargetLoweringX8632 class, which
-/// consists almost entirely of the lowering sequence for each
-/// high-level instruction.
+/// This file implements the TargetLoweringX8632 class, which consists almost
+/// entirely of the lowering sequence for each high-level instruction.
///
//===----------------------------------------------------------------------===//
@@ -100,24 +99,21 @@
void TargetX8632::lowerCall(const InstCall *Instr) {
// x86-32 calling convention:
//
- // * At the point before the call, the stack must be aligned to 16
- // bytes.
+ // * At the point before the call, the stack must be aligned to 16 bytes.
//
- // * The first four arguments of vector type, regardless of their
- // position relative to the other arguments in the argument list, are
- // placed in registers xmm0 - xmm3.
+ // * The first four arguments of vector type, regardless of their position
+ // relative to the other arguments in the argument list, are placed in
+ // registers xmm0 - xmm3.
//
- // * Other arguments are pushed onto the stack in right-to-left order,
- // such that the left-most argument ends up on the top of the stack at
- // the lowest memory address.
+ // * Other arguments are pushed onto the stack in right-to-left order, such
+ // that the left-most argument ends up on the top of the stack at the lowest
+ // memory address.
//
- // * Stack arguments of vector type are aligned to start at the next
- // highest multiple of 16 bytes. Other stack arguments are aligned to
- // 4 bytes.
+ // * Stack arguments of vector type are aligned to start at the next highest
+ // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes.
//
- // This intends to match the section "IA-32 Function Calling
- // Convention" of the document "OS X ABI Function Call Guide" by
- // Apple.
+ // This intends to match the section "IA-32 Function Calling Convention" of
+ // the document "OS X ABI Function Call Guide" by Apple.
NeedsStackAlignment = true;
using OperandList = std::vector<Operand *>;
@@ -149,46 +145,44 @@
}
}
- // Adjust the parameter area so that the stack is aligned. It is
- // assumed that the stack is already aligned at the start of the
- // calling sequence.
+ // Adjust the parameter area so that the stack is aligned. It is assumed that
+ // the stack is already aligned at the start of the calling sequence.
ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
- // Subtract the appropriate amount for the argument area. This also
- // takes care of setting the stack adjustment during emission.
+ // Subtract the appropriate amount for the argument area. This also takes
+ // care of setting the stack adjustment during emission.
//
- // TODO: If for some reason the call instruction gets dead-code
- // eliminated after lowering, we would need to ensure that the
- // pre-call and the post-call esp adjustment get eliminated as well.
+ // TODO: If for some reason the call instruction gets dead-code eliminated
+ // after lowering, we would need to ensure that the pre-call and the
+ // post-call esp adjustment get eliminated as well.
if (ParameterAreaSizeBytes) {
_adjust_stack(ParameterAreaSizeBytes);
}
- // Copy arguments that are passed on the stack to the appropriate
- // stack locations.
+ // Copy arguments that are passed on the stack to the appropriate stack
+ // locations.
for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
}
- // Copy arguments to be passed in registers to the appropriate
- // registers.
- // TODO: Investigate the impact of lowering arguments passed in
- // registers after lowering stack arguments as opposed to the other
- // way around. Lowering register arguments after stack arguments may
- // reduce register pressure. On the other hand, lowering register
- // arguments first (before stack arguments) may result in more compact
- // code, as the memory operand displacements may end up being smaller
- // before any stack adjustment is done.
+ // Copy arguments to be passed in registers to the appropriate registers.
+ // TODO: Investigate the impact of lowering arguments passed in registers
+ // after lowering stack arguments as opposed to the other way around.
+ // Lowering register arguments after stack arguments may reduce register
+ // pressure. On the other hand, lowering register arguments first (before
+ // stack arguments) may result in more compact code, as the memory operand
+ // displacements may end up being smaller before any stack adjustment is
+ // done.
for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Variable *Reg =
legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
- // Generate a FakeUse of register arguments so that they do not get
- // dead code eliminated as a result of the FakeKill of scratch
- // registers after the call.
+ // Generate a FakeUse of register arguments so that they do not get dead
+ // code eliminated as a result of the FakeKill of scratch registers after
+ // the call.
Context.insert(InstFakeUse::create(Func, Reg));
}
- // Generate the call instruction. Assign its result to a temporary
- // with high register allocation weight.
+ // Generate the call instruction. Assign its result to a temporary with high
+ // register allocation weight.
Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr;
@@ -211,8 +205,8 @@
break;
case IceType_f32:
case IceType_f64:
- // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
- // the fstp instruction.
+ // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with the
+ // fstp instruction.
break;
case IceType_v4i1:
case IceType_v8i1:
@@ -247,8 +241,8 @@
if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi));
- // Add the appropriate offset to esp. The call instruction takes care
- // of resetting the stack offset during emission.
+ // Add the appropriate offset to esp. The call instruction takes care of
+ // resetting the stack offset during emission.
if (ParameterAreaSizeBytes) {
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
@@ -287,22 +281,21 @@
}
}
} else if (isScalarFloatingType(Dest->getType())) {
- // Special treatment for an FP function which returns its result in
- // st(0).
- // If Dest ends up being a physical xmm register, the fstp emit code
- // will route st(0) through a temporary stack slot.
+ // Special treatment for an FP function which returns its result in st(0).
+ // If Dest ends up being a physical xmm register, the fstp emit code will
+ // route st(0) through a temporary stack slot.
_fstp(Dest);
- // Create a fake use of Dest in case it actually isn't used,
- // because st(0) still needs to be popped.
+ // Create a fake use of Dest in case it actually isn't used, because st(0)
+ // still needs to be popped.
Context.insert(InstFakeUse::create(Func, Dest));
}
}
void TargetX8632::lowerArguments() {
VarList &Args = Func->getArgs();
- // The first four arguments of vector type, regardless of their
- // position relative to the other arguments in the argument list, are
- // passed in registers xmm0 - xmm3.
+ // The first four arguments of vector type, regardless of their position
+ // relative to the other arguments in the argument list, are passed in
+ // registers xmm0 - xmm3.
unsigned NumXmmArgs = 0;
Context.init(Func->getEntryNode());
@@ -314,9 +307,9 @@
Type Ty = Arg->getType();
if (!isVectorType(Ty))
continue;
- // Replace Arg in the argument list with the home register. Then
- // generate an instruction in the prolog to copy the home register
- // to the assigned location of Arg.
+ // Replace Arg in the argument list with the home register. Then generate
+ // an instruction in the prolog to copy the home register to the assigned
+ // location of Arg.
int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;
++NumXmmArgs;
Variable *RegisterArg = Func->makeVariable(Ty);
@@ -351,15 +344,14 @@
_mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
}
}
- // Add a ret instruction even if sandboxing is enabled, because
- // addEpilog explicitly looks for a ret instruction as a marker for
- // where to insert the frame removal instructions.
+ // Add a ret instruction even if sandboxing is enabled, because addEpilog
+ // explicitly looks for a ret instruction as a marker for where to insert the
+ // frame removal instructions.
_ret(Reg);
// Add a fake use of esp to make sure esp stays alive for the entire
- // function. Otherwise post-call esp adjustments get dead-code
- // eliminated. TODO: Are there more places where the fake use
- // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
- // have a ret instruction.
+ // function. Otherwise post-call esp adjustments get dead-code eliminated.
+ // TODO: Are there more places where the fake use should be inserted? E.g.
+ // "void f(int n){while(1) g(n);}" may not have a ret instruction.
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Context.insert(InstFakeUse::create(Func, esp));
@@ -395,16 +387,15 @@
// * LocalsSpillAreaSize: area 6
// * SpillAreaSizeBytes: areas 3 - 7
- // Determine stack frame offsets for each Variable without a
- // register assignment. This can be done as one variable per stack
- // slot. Or, do coalescing by running the register allocator again
- // with an infinite set of registers (as a side effect, this gives
- // variables a second chance at physical register assignment).
+ // Determine stack frame offsets for each Variable without a register
+ // assignment. This can be done as one variable per stack slot. Or, do
+ // coalescing by running the register allocator again with an infinite set of
+ // registers (as a side effect, this gives variables a second chance at
+ // physical register assignment).
//
- // A middle ground approach is to leverage sparsity and allocate one
- // block of space on the frame for globals (variables with
- // multi-block lifetime), and one block to share for locals
- // (single-block lifetime).
+ // A middle ground approach is to leverage sparsity and allocate one block of
+ // space on the frame for globals (variables with multi-block lifetime), and
+ // one block to share for locals (single-block lifetime).
Context.init(Node);
Context.setInsertPoint(Context.getCur());
@@ -414,17 +405,16 @@
RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
size_t GlobalsSize = 0;
- // If there is a separate locals area, this represents that area.
- // Otherwise it counts any variable not counted by GlobalsSize.
+ // If there is a separate locals area, this represents that area. Otherwise
+ // it counts any variable not counted by GlobalsSize.
SpillAreaSizeBytes = 0;
- // If there is a separate locals area, this specifies the alignment
- // for it.
+ // If there is a separate locals area, this specifies the alignment for it.
uint32_t LocalsSlotsAlignmentBytes = 0;
- // The entire spill locations area gets aligned to largest natural
- // alignment of the variables that have a spill slot.
+ // The entire spill locations area gets aligned to largest natural alignment
+ // of the variables that have a spill slot.
uint32_t SpillAreaAlignmentBytes = 0;
- // A spill slot linked to a variable with a stack slot should reuse
- // that stack slot.
+ // A spill slot linked to a variable with a stack slot should reuse that
+ // stack slot.
std::function<bool(Variable *)> TargetVarHook =
[&VariablesLinkedToSpillSlots](Variable *Var) {
if (auto *SpillVar =
@@ -466,15 +456,14 @@
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_push(ebp);
_mov(ebp, esp);
- // Keep ebp live for late-stage liveness analysis
- // (e.g. asm-verbose mode).
+ // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).
Context.insert(InstFakeUse::create(Func, ebp));
}
- // Align the variables area. SpillAreaPaddingBytes is the size of
- // the region after the preserved registers and before the spill areas.
- // LocalsSlotsPaddingBytes is the amount of padding between the globals
- // and locals area if they are separate.
+ // Align the variables area. SpillAreaPaddingBytes is the size of the region
+ // after the preserved registers and before the spill areas.
+ // LocalsSlotsPaddingBytes is the amount of padding between the globals and
+ // locals area if they are separate.
assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
uint32_t SpillAreaPaddingBytes = 0;
@@ -504,9 +493,9 @@
resetStackAdjustment();
- // Fill in stack offsets for stack args, and copy args into registers
- // for those that were register-allocated. Args are pushed right to
- // left, so Arg[0] is closest to the stack/frame pointer.
+ // Fill in stack offsets for stack args, and copy args into registers for
+ // those that were register-allocated. Args are pushed right to left, so
+ // Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
size_t BasicFrameOffset =
PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
@@ -576,8 +565,8 @@
if (RI == E)
return;
- // Convert the reverse_iterator position into its corresponding
- // (forward) iterator position.
+ // Convert the reverse_iterator position into its corresponding (forward)
+ // iterator position.
InstList::iterator InsertPoint = RI.base();
--InsertPoint;
Context.init(Node);
@@ -586,9 +575,9 @@
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
if (IsEbpBasedFrame) {
Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
- // For late-stage liveness analysis (e.g. asm-verbose mode),
- // adding a fake use of esp before the assignment of esp=ebp keeps
- // previous esp adjustments from being dead-code eliminated.
+ // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
+ // use of esp before the assignment of esp=ebp keeps previous esp
+ // adjustments from being dead-code eliminated.
Context.insert(InstFakeUse::create(Func, esp));
_mov(esp, ebp);
_pop(ebp);
@@ -747,8 +736,8 @@
continue;
typename T::IceType *Const = llvm::cast<typename T::IceType>(C);
typename T::IceType::PrimType Value = Const->getValue();
- // Use memcpy() to copy bits from Value into RawValue in a way
- // that avoids breaking strict-aliasing rules.
+ // Use memcpy() to copy bits from Value into RawValue in a way that avoids
+ // breaking strict-aliasing rules.
typename T::PrimitiveIntType RawValue;
memcpy(&RawValue, &Value, sizeof(Value));
char buf[30];
@@ -766,8 +755,8 @@
void TargetDataX8632::lowerConstants() {
if (Ctx->getFlags().getDisableTranslation())
return;
- // No need to emit constants from the int pool since (for x86) they
- // are embedded as immediates in the instructions, just emit float/double.
+ // No need to emit constants from the int pool since (for x86) they are
+ // embedded as immediates in the instructions, just emit float/double.
switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
@@ -846,19 +835,17 @@
TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)
: TargetHeaderLowering(Ctx) {}
-// In some cases, there are x-macros tables for both high-level and
-// low-level instructions/operands that use the same enum key value.
-// The tables are kept separate to maintain a proper separation
-// between abstraction layers. There is a risk that the tables could
-// get out of sync if enum values are reordered or if entries are
-// added or deleted. The following dummy namespaces use
+// In some cases, there are x-macros tables for both high-level and low-level
+// instructions/operands that use the same enum key value. The tables are kept
+// separate to maintain a proper separation between abstraction layers. There
+// is a risk that the tables could get out of sync if enum values are reordered
+// or if entries are added or deleted. The following dummy namespaces use
// static_asserts to ensure everything is kept in sync.
namespace {
// Validate the enum values in FCMPX8632_TABLE.
namespace dummy1 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
FCMPX8632_TABLE
@@ -869,8 +856,8 @@
#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
ICEINSTFCMP_TABLE
#undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
#define X(val, dflt, swapS, C1, C2, swapV, pred) \
static const int _table2_##val = _tmp_##val; \
static_assert( \
@@ -878,8 +865,8 @@
"Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
FCMPX8632_TABLE
#undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
#define X(tag, str) \
static_assert( \
_table1_##tag == _table2_##tag, \
@@ -890,8 +877,7 @@
// Validate the enum values in ICMPX8632_TABLE.
namespace dummy2 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
ICMPX8632_TABLE
@@ -902,8 +888,8 @@
#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
ICEINSTICMP_TABLE
#undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
#define X(val, C_32, C1_64, C2_64, C3_64) \
static const int _table2_##val = _tmp_##val; \
static_assert( \
@@ -911,8 +897,8 @@
"Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
ICMPX8632_TABLE
#undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
#define X(tag, str) \
static_assert( \
_table1_##tag == _table2_##tag, \
@@ -923,8 +909,7 @@
// Validate the enum values in ICETYPEX8632_TABLE.
namespace dummy3 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
ICETYPEX8632_TABLE
@@ -936,16 +921,16 @@
static const int _table1_##tag = tag;
ICETYPE_TABLE
#undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
#define X(tag, elementty, cvt, sdss, pack, width, fld) \
static const int _table2_##tag = _tmp_##tag; \
static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
ICETYPEX8632_TABLE
#undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
#define X(tag, sizeLog2, align, elts, elty, str) \
static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 6187809..b1d74f5 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -8,9 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares the TargetLoweringX8632 class, which
-/// implements the TargetLowering interface for the x86-32
-/// architecture.
+/// This file declares the TargetLoweringX8632 class, which implements the
+/// TargetLowering interface for the x86-32 architecture.
///
//===----------------------------------------------------------------------===//
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h
index 3bee361..918a585 100644
--- a/src/IceTargetLoweringX8632Traits.h
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -390,10 +390,10 @@
const llvm::SmallBitVector &ExcludeRegisters,
uint64_t Salt) {
// TODO(stichnot): Declaring Permutation this way loses type/size
- // information. Fix this in conjunction with the caller-side TODO.
+ // information. Fix this in conjunction with the caller-side TODO.
assert(Permutation.size() >= RegisterSet::Reg_NUM);
// Expected upper bound on the number of registers in a single equivalence
- // class. For x86-32, this would comprise the 8 XMM registers. This is for
+ // class. For x86-32, this would comprise the 8 XMM registers. This is for
// performance, not correctness.
static const unsigned MaxEquivalenceClassSize = 8;
using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>;
@@ -477,8 +477,8 @@
static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16;
/// @}
- /// Value is in bytes. Return Value adjusted to the next highest multiple
- /// of the stack alignment.
+ /// Value is in bytes. Return Value adjusted to the next highest multiple of
+ /// the stack alignment.
static uint32_t applyStackAlignment(uint32_t Value) {
return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
}
@@ -500,17 +500,17 @@
/// instruction. There is one table entry for each of the 16 conditions.
///
/// The first four columns describe the case when the operands are floating
- /// point scalar values. A comment in lowerFcmp() describes the lowering
- /// template. In the most general case, there is a compare followed by two
+ /// point scalar values. A comment in lowerFcmp() describes the lowering
+ /// template. In the most general case, there is a compare followed by two
/// conditional branches, because some fcmp conditions don't map to a single
- /// x86 conditional branch. However, in many cases it is possible to swap the
- /// operands in the comparison and have a single conditional branch. Since
+ /// x86 conditional branch. However, in many cases it is possible to swap the
+ /// operands in the comparison and have a single conditional branch. Since
/// it's quite tedious to validate the table by hand, good execution tests are
/// helpful.
///
/// The last two columns describe the case when the operands are vectors of
- /// floating point values. For most fcmp conditions, there is a clear mapping
- /// to a single x86 cmpps instruction variant. Some fcmp conditions require
+ /// floating point values. For most fcmp conditions, there is a clear mapping
+ /// to a single x86 cmpps instruction variant. Some fcmp conditions require
/// special code to handle and these are marked in the table with a
/// Cmpps_Invalid predicate.
/// {@
@@ -525,7 +525,7 @@
/// @}
/// The following table summarizes the logic for lowering the icmp instruction
- /// for i32 and narrower types. Each icmp condition has a clear mapping to an
+ /// for i32 and narrower types. Each icmp condition has a clear mapping to an
/// x86 conditional branch instruction.
/// {@
static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[];
@@ -533,8 +533,8 @@
/// @}
/// The following table summarizes the logic for lowering the icmp instruction
- /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
- /// conditional branches are needed. For the other conditions, three separate
+ /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
+ /// conditional branches are needed. For the other conditions, three separate
/// conditional branches are needed.
/// {@
static const struct TableIcmp64Type {
@@ -567,8 +567,8 @@
using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8632>;
using Assembler = X8632::AssemblerX8632;
- /// X86Operand extends the Operand hierarchy. Its subclasses are
- /// X86OperandMem and VariableSplit.
+ /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem
+ /// and VariableSplit.
class X86Operand : public ::Ice::Operand {
X86Operand() = delete;
X86Operand(const X86Operand &) = delete;
@@ -644,8 +644,8 @@
};
/// VariableSplit is a way to treat an f64 memory location as a pair of i32
- /// locations (Low and High). This is needed for some cases of the Bitcast
- /// instruction. Since it's not possible for integer registers to access the
+ /// locations (Low and High). This is needed for some cases of the Bitcast
+ /// instruction. Since it's not possible for integer registers to access the
/// XMM registers and vice versa, the lowering forces the f64 to be spilled to
/// the stack and then accesses through the VariableSplit.
// TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit
@@ -685,11 +685,11 @@
Portion Part;
};
- /// SpillVariable decorates a Variable by linking it to another Variable.
- /// When stack frame offsets are computed, the SpillVariable is given a
- /// distinct stack slot only if its linked Variable has a register. If the
- /// linked Variable has a stack slot, then the Variable and SpillVariable
- /// share that slot.
+ /// SpillVariable decorates a Variable by linking it to another Variable. When
+ /// stack frame offsets are computed, the SpillVariable is given a distinct
+ /// stack slot only if its linked Variable has a register. If the linked
+ /// Variable has a stack slot, then the Variable and SpillVariable share that
+ /// slot.
class SpillVariable : public Variable {
SpillVariable() = delete;
SpillVariable(const SpillVariable &) = delete;
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 83a5fa5..8c77baa 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -8,9 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements the TargetLoweringX8664 class, which
-/// consists almost entirely of the lowering sequence for each
-/// high-level instruction.
+/// This file implements the TargetLoweringX8664 class, which consists almost
+/// entirely of the lowering sequence for each high-level instruction.
///
//===----------------------------------------------------------------------===//
@@ -131,24 +130,22 @@
void TargetX8664::lowerCall(const InstCall *Instr) {
// x86-64 calling convention:
//
- // * At the point before the call, the stack must be aligned to 16
- // bytes.
+ // * At the point before the call, the stack must be aligned to 16 bytes.
//
// * The first eight arguments of vector/fp type, regardless of their
- // position relative to the other arguments in the argument list, are
- // placed in registers %xmm0 - %xmm7.
+ // position relative to the other arguments in the argument list, are placed
+ // in registers %xmm0 - %xmm7.
//
- // * The first six arguments of integer types, regardless of their
- // position relative to the other arguments in the argument list, are
- // placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
+ // * The first six arguments of integer types, regardless of their position
+ // relative to the other arguments in the argument list, are placed in
+ // registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
//
- // * Other arguments are pushed onto the stack in right-to-left order,
- // such that the left-most argument ends up on the top of the stack at
- // the lowest memory address.
+ // * Other arguments are pushed onto the stack in right-to-left order, such
+ // that the left-most argument ends up on the top of the stack at the lowest
+ // memory address.
//
- // * Stack arguments of vector type are aligned to start at the next
- // highest multiple of 16 bytes. Other stack arguments are aligned to
- // 8 bytes.
+ // * Stack arguments of vector type are aligned to start at the next highest
+ // multiple of 16 bytes. Other stack arguments are aligned to 8 bytes.
//
// This intends to match the section "Function Calling Sequence" of the
// document "System V Application Binary Interface."
@@ -191,41 +188,39 @@
}
}
- // Adjust the parameter area so that the stack is aligned. It is
- // assumed that the stack is already aligned at the start of the
- // calling sequence.
+ // Adjust the parameter area so that the stack is aligned. It is assumed that
+ // the stack is already aligned at the start of the calling sequence.
ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
- // Subtract the appropriate amount for the argument area. This also
- // takes care of setting the stack adjustment during emission.
+ // Subtract the appropriate amount for the argument area. This also takes
+ // care of setting the stack adjustment during emission.
//
- // TODO: If for some reason the call instruction gets dead-code
- // eliminated after lowering, we would need to ensure that the
- // pre-call and the post-call esp adjustment get eliminated as well.
+ // TODO: If for some reason the call instruction gets dead-code eliminated
+ // after lowering, we would need to ensure that the pre-call and the
+ // post-call esp adjustment get eliminated as well.
if (ParameterAreaSizeBytes) {
_adjust_stack(ParameterAreaSizeBytes);
}
- // Copy arguments that are passed on the stack to the appropriate
- // stack locations.
+ // Copy arguments that are passed on the stack to the appropriate stack
+ // locations.
for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
}
- // Copy arguments to be passed in registers to the appropriate
- // registers.
- // TODO: Investigate the impact of lowering arguments passed in
- // registers after lowering stack arguments as opposed to the other
- // way around. Lowering register arguments after stack arguments may
- // reduce register pressure. On the other hand, lowering register
- // arguments first (before stack arguments) may result in more compact
- // code, as the memory operand displacements may end up being smaller
- // before any stack adjustment is done.
+ // Copy arguments to be passed in registers to the appropriate registers.
+ // TODO: Investigate the impact of lowering arguments passed in registers
+ // after lowering stack arguments as opposed to the other way around.
+ // Lowering register arguments after stack arguments may reduce register
+ // pressure. On the other hand, lowering register arguments first (before
+ // stack arguments) may result in more compact code, as the memory operand
+ // displacements may end up being smaller before any stack adjustment is
+ // done.
for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));
- // Generate a FakeUse of register arguments so that they do not get
- // dead code eliminated as a result of the FakeKill of scratch
- // registers after the call.
+ // Generate a FakeUse of register arguments so that they do not get dead
+ // code eliminated as a result of the FakeKill of scratch registers after
+ // the call.
Context.insert(InstFakeUse::create(Func, Reg));
}
@@ -234,8 +229,8 @@
Context.insert(InstFakeUse::create(Func, Reg));
}
- // Generate the call instruction. Assign its result to a temporary
- // with high register allocation weight.
+ // Generate the call instruction. Assign its result to a temporary with high
+ // register allocation weight.
Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr;
@@ -277,8 +272,8 @@
llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
}
- // Add the appropriate offset to esp. The call instruction takes care
- // of resetting the stack offset during emission.
+ // Add the appropriate offset to esp. The call instruction takes care of
+ // resetting the stack offset during emission.
if (ParameterAreaSizeBytes) {
Variable *Esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
@@ -310,11 +305,12 @@
void TargetX8664::lowerArguments() {
VarList &Args = Func->getArgs();
- // The first eight vetcor typed arguments (as well as fp arguments) are passed
- // in %xmm0 through %xmm7 regardless of their position in the argument list.
+ // The first eight vetcor typed arguments (as well as fp arguments) are
+ // passed in %xmm0 through %xmm7 regardless of their position in the argument
+ // list.
unsigned NumXmmArgs = 0;
- // The first six integer typed arguments are passed in %rdi, %rsi, %rdx, %rcx,
- // %r8, and %r9 regardless of their position in the argument list.
+ // The first six integer typed arguments are passed in %rdi, %rsi, %rdx,
+ // %rcx, %r8, and %r9 regardless of their position in the argument list.
unsigned NumGprArgs = 0;
Context.init(Func->getEntryNode());
@@ -345,9 +341,9 @@
}
assert(RegNum != Variable::NoRegister);
assert(RegisterArg != nullptr);
- // Replace Arg in the argument list with the home register. Then
- // generate an instruction in the prolog to copy the home register
- // to the assigned location of Arg.
+ // Replace Arg in the argument list with the home register. Then generate
+ // an instruction in the prolog to copy the home register to the assigned
+ // location of Arg.
if (BuildDefs::dump())
RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
RegisterArg->setRegNum(RegNum);
@@ -371,15 +367,14 @@
_mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
}
}
- // Add a ret instruction even if sandboxing is enabled, because
- // addEpilog explicitly looks for a ret instruction as a marker for
- // where to insert the frame removal instructions.
+ // Add a ret instruction even if sandboxing is enabled, because addEpilog
+ // explicitly looks for a ret instruction as a marker for where to insert the
+ // frame removal instructions.
_ret(Reg);
// Add a fake use of esp to make sure esp stays alive for the entire
- // function. Otherwise post-call esp adjustments get dead-code
- // eliminated. TODO: Are there more places where the fake use
- // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
- // have a ret instruction.
+ // function. Otherwise post-call esp adjustments get dead-code eliminated.
+ // TODO: Are there more places where the fake use should be inserted? E.g.
+ // "void f(int n){while(1) g(n);}" may not have a ret instruction.
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Context.insert(InstFakeUse::create(Func, esp));
@@ -415,16 +410,15 @@
// * LocalsSpillAreaSize: area 6
// * SpillAreaSizeBytes: areas 3 - 7
- // Determine stack frame offsets for each Variable without a
- // register assignment. This can be done as one variable per stack
- // slot. Or, do coalescing by running the register allocator again
- // with an infinite set of registers (as a side effect, this gives
- // variables a second chance at physical register assignment).
+ // Determine stack frame offsets for each Variable without a register
+ // assignment. This can be done as one variable per stack slot. Or, do
+ // coalescing by running the register allocator again with an infinite set of
+ // registers (as a side effect, this gives variables a second chance at
+ // physical register assignment).
//
- // A middle ground approach is to leverage sparsity and allocate one
- // block of space on the frame for globals (variables with
- // multi-block lifetime), and one block to share for locals
- // (single-block lifetime).
+ // A middle ground approach is to leverage sparsity and allocate one block of
+ // space on the frame for globals (variables with multi-block lifetime), and
+ // one block to share for locals (single-block lifetime).
Context.init(Node);
Context.setInsertPoint(Context.getCur());
@@ -434,17 +428,16 @@
RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
size_t GlobalsSize = 0;
- // If there is a separate locals area, this represents that area.
- // Otherwise it counts any variable not counted by GlobalsSize.
+ // If there is a separate locals area, this represents that area. Otherwise
+ // it counts any variable not counted by GlobalsSize.
SpillAreaSizeBytes = 0;
- // If there is a separate locals area, this specifies the alignment
- // for it.
+ // If there is a separate locals area, this specifies the alignment for it.
uint32_t LocalsSlotsAlignmentBytes = 0;
- // The entire spill locations area gets aligned to largest natural
- // alignment of the variables that have a spill slot.
+ // The entire spill locations area gets aligned to largest natural alignment
+ // of the variables that have a spill slot.
uint32_t SpillAreaAlignmentBytes = 0;
- // A spill slot linked to a variable with a stack slot should reuse
- // that stack slot.
+ // A spill slot linked to a variable with a stack slot should reuse that
+ // stack slot.
std::function<bool(Variable *)> TargetVarHook =
[&VariablesLinkedToSpillSlots](Variable *Var) {
if (auto *SpillVar =
@@ -486,15 +479,14 @@
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_push(ebp);
_mov(ebp, esp);
- // Keep ebp live for late-stage liveness analysis
- // (e.g. asm-verbose mode).
+ // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).
Context.insert(InstFakeUse::create(Func, ebp));
}
- // Align the variables area. SpillAreaPaddingBytes is the size of
- // the region after the preserved registers and before the spill areas.
- // LocalsSlotsPaddingBytes is the amount of padding between the globals
- // and locals area if they are separate.
+ // Align the variables area. SpillAreaPaddingBytes is the size of the region
+ // after the preserved registers and before the spill areas.
+ // LocalsSlotsPaddingBytes is the amount of padding between the globals and
+ // locals area if they are separate.
assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
uint32_t SpillAreaPaddingBytes = 0;
@@ -524,9 +516,9 @@
resetStackAdjustment();
- // Fill in stack offsets for stack args, and copy args into registers
- // for those that were register-allocated. Args are pushed right to
- // left, so Arg[0] is closest to the stack/frame pointer.
+ // Fill in stack offsets for stack args, and copy args into registers for
+ // those that were register-allocated. Args are pushed right to left, so
+ // Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
size_t BasicFrameOffset =
PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
@@ -605,8 +597,8 @@
if (RI == E)
return;
- // Convert the reverse_iterator position into its corresponding
- // (forward) iterator position.
+ // Convert the reverse_iterator position into its corresponding (forward)
+ // iterator position.
InstList::iterator InsertPoint = RI.base();
--InsertPoint;
Context.init(Node);
@@ -615,9 +607,9 @@
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
if (IsEbpBasedFrame) {
Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
- // For late-stage liveness analysis (e.g. asm-verbose mode),
- // adding a fake use of esp before the assignment of esp=ebp keeps
- // previous esp adjustments from being dead-code eliminated.
+ // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
+ // use of esp before the assignment of esp=ebp keeps previous esp
+ // adjustments from being dead-code eliminated.
Context.insert(InstFakeUse::create(Func, esp));
_mov(esp, ebp);
_pop(ebp);
@@ -758,8 +750,8 @@
continue;
typename T::IceType *Const = llvm::cast<typename T::IceType>(C);
typename T::IceType::PrimType Value = Const->getValue();
- // Use memcpy() to copy bits from Value into RawValue in a way
- // that avoids breaking strict-aliasing rules.
+ // Use memcpy() to copy bits from Value into RawValue in a way that avoids
+ // breaking strict-aliasing rules.
typename T::PrimitiveIntType RawValue;
memcpy(&RawValue, &Value, sizeof(Value));
char buf[30];
@@ -777,8 +769,8 @@
void TargetDataX8664::lowerConstants() {
if (Ctx->getFlags().getDisableTranslation())
return;
- // No need to emit constants from the int pool since (for x86) they
- // are embedded as immediates in the instructions, just emit float/double.
+ // No need to emit constants from the int pool since (for x86) they are
+ // embedded as immediates in the instructions, just emit float/double.
switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
@@ -854,19 +846,17 @@
}
}
-// In some cases, there are x-macros tables for both high-level and
-// low-level instructions/operands that use the same enum key value.
-// The tables are kept separate to maintain a proper separation
-// between abstraction layers. There is a risk that the tables could
-// get out of sync if enum values are reordered or if entries are
-// added or deleted. The following dummy namespaces use
+// In some cases, there are x-macros tables for both high-level and low-level
+// instructions/operands that use the same enum key value. The tables are kept
+// separate to maintain a proper separation between abstraction layers. There
+// is a risk that the tables could get out of sync if enum values are reordered
+// or if entries are added or deleted. The following dummy namespaces use
// static_asserts to ensure everything is kept in sync.
namespace {
// Validate the enum values in FCMPX8664_TABLE.
namespace dummy1 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
FCMPX8664_TABLE
@@ -877,8 +867,8 @@
#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
ICEINSTFCMP_TABLE
#undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
#define X(val, dflt, swapS, C1, C2, swapV, pred) \
static const int _table2_##val = _tmp_##val; \
static_assert( \
@@ -886,8 +876,8 @@
"Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE");
FCMPX8664_TABLE
#undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
#define X(tag, str) \
static_assert( \
_table1_##tag == _table2_##tag, \
@@ -898,8 +888,7 @@
// Validate the enum values in ICMPX8664_TABLE.
namespace dummy2 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
ICMPX8664_TABLE
@@ -910,8 +899,8 @@
#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
ICEINSTICMP_TABLE
#undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
#define X(val, C_32, C1_64, C2_64, C3_64) \
static const int _table2_##val = _tmp_##val; \
static_assert( \
@@ -919,8 +908,8 @@
"Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE");
ICMPX8664_TABLE
#undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
#define X(tag, str) \
static_assert( \
_table1_##tag == _table2_##tag, \
@@ -931,8 +920,7 @@
// Validate the enum values in ICETYPEX8664_TABLE.
namespace dummy3 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
ICETYPEX8664_TABLE
@@ -944,16 +932,16 @@
static const int _table1_##tag = tag;
ICETYPE_TABLE
#undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
#define X(tag, elementty, cvt, sdss, pack, width, fld) \
static const int _table2_##tag = _tmp_##tag; \
static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
ICETYPEX8664_TABLE
#undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
#define X(tag, sizeLog2, align, elts, elty, str) \
static_assert(_table1_##tag == _table2_##tag, \
"Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
diff --git a/src/IceTargetLoweringX8664Traits.h b/src/IceTargetLoweringX8664Traits.h
index 454b6cb..0ed40a8 100644
--- a/src/IceTargetLoweringX8664Traits.h
+++ b/src/IceTargetLoweringX8664Traits.h
@@ -404,10 +404,10 @@
const llvm::SmallBitVector &ExcludeRegisters,
uint64_t Salt) {
// TODO(stichnot): Declaring Permutation this way loses type/size
- // information. Fix this in conjunction with the caller-side TODO.
+ // information. Fix this in conjunction with the caller-side TODO.
assert(Permutation.size() >= RegisterSet::Reg_NUM);
// Expected upper bound on the number of registers in a single equivalence
- // class. For x86-64, this would comprise the 16 XMM registers. This is
+ // class. For x86-64, this would comprise the 16 XMM registers. This is
// for performance, not correctness.
static const unsigned MaxEquivalenceClassSize = 8;
using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>;
@@ -493,8 +493,8 @@
static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16;
/// @}
- /// Value is in bytes. Return Value adjusted to the next highest multiple
- /// of the stack alignment.
+ /// Value is in bytes. Return Value adjusted to the next highest multiple of
+ /// the stack alignment.
static uint32_t applyStackAlignment(uint32_t Value) {
return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
}
@@ -516,17 +516,17 @@
/// instruction. There is one table entry for each of the 16 conditions.
///
/// The first four columns describe the case when the operands are floating
- /// point scalar values. A comment in lowerFcmp() describes the lowering
- /// template. In the most general case, there is a compare followed by two
+ /// point scalar values. A comment in lowerFcmp() describes the lowering
+ /// template. In the most general case, there is a compare followed by two
/// conditional branches, because some fcmp conditions don't map to a single
- /// x86 conditional branch. However, in many cases it is possible to swap the
- /// operands in the comparison and have a single conditional branch. Since
+ /// x86 conditional branch. However, in many cases it is possible to swap the
+ /// operands in the comparison and have a single conditional branch. Since
/// it's quite tedious to validate the table by hand, good execution tests are
/// helpful.
///
/// The last two columns describe the case when the operands are vectors of
- /// floating point values. For most fcmp conditions, there is a clear mapping
- /// to a single x86 cmpps instruction variant. Some fcmp conditions require
+ /// floating point values. For most fcmp conditions, there is a clear mapping
+ /// to a single x86 cmpps instruction variant. Some fcmp conditions require
/// special code to handle and these are marked in the table with a
/// Cmpps_Invalid predicate.
/// {@
@@ -541,7 +541,7 @@
/// @}
/// The following table summarizes the logic for lowering the icmp instruction
- /// for i32 and narrower types. Each icmp condition has a clear mapping to an
+ /// for i32 and narrower types. Each icmp condition has a clear mapping to an
/// x86 conditional branch instruction.
/// {@
static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[];
@@ -549,8 +549,8 @@
/// @}
/// The following table summarizes the logic for lowering the icmp instruction
- /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
- /// conditional branches are needed. For the other conditions, three separate
+ /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
+ /// conditional branches are needed. For the other conditions, three separate
/// conditional branches are needed.
/// {@
static const struct TableIcmp64Type {
@@ -583,8 +583,8 @@
using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8664>;
using Assembler = X8664::AssemblerX8664;
- /// X86Operand extends the Operand hierarchy. Its subclasses are
- /// X86OperandMem and VariableSplit.
+ /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem
+ /// and VariableSplit.
class X86Operand : public ::Ice::Operand {
X86Operand() = delete;
X86Operand(const X86Operand &) = delete;
@@ -655,8 +655,8 @@
};
/// VariableSplit is a way to treat an f64 memory location as a pair of i32
- /// locations (Low and High). This is needed for some cases of the Bitcast
- /// instruction. Since it's not possible for integer registers to access the
+ /// locations (Low and High). This is needed for some cases of the Bitcast
+ /// instruction. Since it's not possible for integer registers to access the
/// XMM registers and vice versa, the lowering forces the f64 to be spilled to
/// the stack and then accesses through the VariableSplit.
// TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit
@@ -696,11 +696,11 @@
Portion Part;
};
- /// SpillVariable decorates a Variable by linking it to another Variable.
- /// When stack frame offsets are computed, the SpillVariable is given a
- /// distinct stack slot only if its linked Variable has a register. If the
- /// linked Variable has a stack slot, then the Variable and SpillVariable
- /// share that slot.
+ /// SpillVariable decorates a Variable by linking it to another Variable. When
+ /// stack frame offsets are computed, the SpillVariable is given a distinct
+ /// stack slot only if its linked Variable has a register. If the linked
+ /// Variable has a stack slot, then the Variable and SpillVariable share that
+ /// slot.
class SpillVariable : public Variable {
SpillVariable() = delete;
SpillVariable(const SpillVariable &) = delete;
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index e032ce9..32c3e3b 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -8,9 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares the TargetLoweringX86 template class, which
-/// implements the TargetLowering base interface for the x86
-/// architecture.
+/// This file declares the TargetLoweringX86 template class, which implements
+/// the TargetLowering base interface for the x86 architecture.
///
//===----------------------------------------------------------------------===//
@@ -44,7 +43,7 @@
///
/// Note: Ideally, we should be able to
///
-/// static_assert(std::is_base_of<TargetX86Base<Machine>, Machine>::value);
+/// static_assert(std::is_base_of<TargetX86Base<Machine>, Machine>::value);
///
/// but that does not work: the compiler does not know that Machine inherits
/// from TargetX86Base at this point in translation.
@@ -106,13 +105,13 @@
void initNodeForLowering(CfgNode *Node) override;
/// x86-32: Ensure that a 64-bit Variable has been split into 2 32-bit
- /// Variables, creating them if necessary. This is needed for all
- /// I64 operations, and it is needed for pushing F64 arguments for
- /// function calls using the 32-bit push instruction (though the
- /// latter could be done by directly writing to the stack).
+ /// Variables, creating them if necessary. This is needed for all I64
+ /// operations, and it is needed for pushing F64 arguments for function calls
+ /// using the 32-bit push instruction (though the latter could be done by
+ /// directly writing to the stack).
///
- /// x86-64: Complains loudly if invoked because the cpu can handle
- /// 64-bit types natively.
+ /// x86-64: Complains loudly if invoked because the cpu can handle 64-bit
+ /// types natively.
template <typename T = Traits>
typename std::enable_if<!T::Is64Bit, void>::type split64(Variable *Var);
template <typename T = Traits>
@@ -239,13 +238,12 @@
void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
Operand *Src0, Operand *Src1);
- /// Operand legalization helpers. To deal with address mode
- /// constraints, the helpers will create a new Operand and emit
- /// instructions that guarantee that the Operand kind is one of those
- /// indicated by the LegalMask (a bitmask of allowed kinds). If the
- /// input Operand is known to already meet the constraints, it may be
- /// simply returned as the result, without creating any new
- /// instructions or operands.
+ /// Operand legalization helpers. To deal with address mode constraints, the
+ /// helpers will create a new Operand and emit instructions that guarantee
+ /// that the Operand kind is one of those indicated by the LegalMask (a
+ /// bitmask of allowed kinds). If the input Operand is known to already meet
+ /// the constraints, it may be simply returned as the result, without creating
+ /// any new instructions or operands.
enum OperandLegalization {
Legal_None = 0,
Legal_Reg = 1 << 0, // physical register, not stack location
@@ -259,9 +257,9 @@
Variable *legalizeToReg(Operand *From, int32_t RegNum = Variable::NoRegister);
/// Legalize the first source operand for use in the cmp instruction.
Operand *legalizeSrc0ForCmp(Operand *Src0, Operand *Src1);
- /// Turn a pointer operand into a memory operand that can be
- /// used by a real load/store operation. Legalizes the operand as well.
- /// This is a nop if the operand is already a legal memory operand.
+ /// Turn a pointer operand into a memory operand that can be used by a real
+ /// load/store operation. Legalizes the operand as well. This is a nop if the
+ /// operand is already a legal memory operand.
typename Traits::X86OperandMem *formMemoryOperand(Operand *Ptr, Type Ty,
bool DoLegalize = true);
@@ -271,8 +269,8 @@
static constexpr uint32_t NoSizeLimit = 0;
static const Type TypeForSize[];
/// Returns the largest type which is equal to or larger than Size bytes. The
- /// type is suitable for copying memory i.e. a load and store will be a
- /// single instruction (for example x86 will get f64 not i64).
+ /// type is suitable for copying memory i.e. a load and store will be a single
+ /// instruction (for example x86 will get f64 not i64).
static Type largestTypeInSize(uint32_t Size, uint32_t MaxSize = NoSizeLimit);
/// Returns the smallest type which is equal to or larger than Size bytes. If
/// one doesn't exist then the largest type smaller than Size bytes is
@@ -304,9 +302,9 @@
const llvm::SmallBitVector &ExcludeRegisters,
uint64_t Salt) const override;
- /// The following are helpers that insert lowered x86 instructions
- /// with minimal syntactic overhead, so that the lowering code can
- /// look as close to assembly as practical.
+ /// The following are helpers that insert lowered x86 instructions with
+ /// minimal syntactic overhead, so that the lowering code can look as close to
+ /// assembly as practical.
void _adc(Variable *Dest, Operand *Src0) {
Context.insert(Traits::Insts::Adc::create(Func, Dest, Src0));
}
@@ -450,9 +448,9 @@
Context.insert(Traits::Insts::Lea::create(Func, Dest, Src0));
}
void _mfence() { Context.insert(Traits::Insts::Mfence::create(Func)); }
- /// If Dest=nullptr is passed in, then a new variable is created,
- /// marked as infinite register allocation weight, and returned
- /// through the in/out Dest argument.
+ /// If Dest=nullptr is passed in, then a new variable is created, marked as
+ /// infinite register allocation weight, and returned through the in/out Dest
+ /// argument.
void _mov(Variable *&Dest, Operand *Src0,
int32_t RegNum = Variable::NoRegister) {
if (Dest == nullptr)
@@ -626,8 +624,8 @@
void _ud2() { Context.insert(Traits::Insts::UD2::create(Func)); }
void _xadd(Operand *Dest, Variable *Src, bool Locked) {
Context.insert(Traits::Insts::Xadd::create(Func, Dest, Src, Locked));
- // The xadd exchanges Dest and Src (modifying Src).
- // Model that update with a FakeDef followed by a FakeUse.
+ // The xadd exchanges Dest and Src (modifying Src). Model that update with
+ // a FakeDef followed by a FakeUse.
Context.insert(
InstFakeDef::create(Func, Src, llvm::dyn_cast<Variable>(Dest)));
_set_dest_nonkillable();
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index c8bf29f..a63f470 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -8,9 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements the TargetLoweringX86Base class, which
-/// consists almost entirely of the lowering sequence for each
-/// high-level instruction.
+/// This file implements the TargetLoweringX86Base class, which consists almost
+/// entirely of the lowering sequence for each high-level instruction.
///
//===----------------------------------------------------------------------===//
@@ -63,13 +62,13 @@
/// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
bool IsComplex = false;
/// IsLiveOut is initialized conservatively to true, and is set to false when
- /// we encounter an instruction that ends Var's live range. We disable the
- /// folding optimization when Var is live beyond this basic block. Note that
+ /// we encounter an instruction that ends Var's live range. We disable the
+ /// folding optimization when Var is live beyond this basic block. Note that
/// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
/// always be true and the folding optimization will never be performed.
bool IsLiveOut = true;
// NumUses counts the number of times Var is used as a source operand in the
- // basic block. If IsComplex is true and there is more than one use of Var,
+ // basic block. If IsComplex is true and there is more than one use of Var,
// then the folding optimization is disabled for Var.
uint32_t NumUses = 0;
};
@@ -166,7 +165,7 @@
/// Returns true if the producing instruction has a "complex" lowering sequence.
/// This generally means that its lowering sequence requires more than one
/// conditional branch, namely 64-bit integer compares and some floating-point
-/// compares. When this is true, and there is more than one consumer, we prefer
+/// compares. When this is true, and there is more than one consumer, we prefer
/// to disable the folding optimization because it minimizes branches.
template <class MachineTraits>
bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
@@ -222,9 +221,9 @@
setInvalid(I.first);
continue;
}
- // Mark as "dead" rather than outright deleting. This is so that other
+ // Mark as "dead" rather than outright deleting. This is so that other
// peephole style optimizations during or before lowering have access to
- // this instruction in undeleted form. See for example
+ // this instruction in undeleted form. See for example
// tryOptimizedCmpxchgCmpBr().
I.second.Instr->setDead();
}
@@ -303,8 +302,9 @@
// Run this early so it can be used to focus optimizations on potentially hot
// code.
- // TODO(stichnot,ascull): currently only used for regalloc not expensive high
- // level optimizations which could be focused on potentially hot code.
+ // TODO(stichnot,ascull): currently only used for regalloc not
+ // expensive high level optimizations which could be focused on potentially
+ // hot code.
Func->computeLoopNestDepth();
Func->dump("After loop nest depth analysis");
@@ -312,7 +312,7 @@
Func->getVMetadata()->init(VMK_SingleDefs);
Func->doAddressOpt();
- // Find read-modify-write opportunities. Do this after address mode
+ // Find read-modify-write opportunities. Do this after address mode
// optimization so that doAddressOpt() doesn't need to be applied to RMW
// instructions as well.
findRMW();
@@ -321,8 +321,8 @@
// Argument lowering
Func->doArgLowering();
- // Target lowering. This requires liveness analysis for some parts of the
- // lowering decisions, such as compare/branch fusing. If non-lightweight
+ // Target lowering. This requires liveness analysis for some parts of the
+ // lowering decisions, such as compare/branch fusing. If non-lightweight
// liveness analysis is used, the instructions need to be renumbered first
// TODO: This renumbering should only be necessary if we're actually
// calculating live intervals, which we only do for register allocation.
@@ -330,9 +330,9 @@
if (Func->hasError())
return;
- // TODO: It should be sufficient to use the fastest liveness calculation, i.e.
- // livenessLightweight(). However, for some reason that slows down the rest
- // of the translation. Investigate.
+ // TODO: It should be sufficient to use the fastest liveness calculation,
+ // i.e. livenessLightweight(). However, for some reason that slows down the
+ // rest of the translation. Investigate.
Func->liveness(Liveness_Basic);
if (Func->hasError())
return;
@@ -357,7 +357,7 @@
Func->liveness(Liveness_Intervals);
if (Func->hasError())
return;
- // Validate the live range computations. The expensive validation call is
+ // Validate the live range computations. The expensive validation call is
// deliberately only made when assertions are enabled.
assert(Func->validateLiveness());
// The post-codegen dump is done here, after liveness analysis and associated
@@ -386,9 +386,9 @@
// Shuffle basic block order if -reorder-basic-blocks is enabled.
Func->shuffleNodes();
- // Branch optimization. This needs to be done just before code emission. In
+ // Branch optimization. This needs to be done just before code emission. In
// particular, no transformations that insert or reorder CfgNodes should be
- // done after branch optimization. We go ahead and do it before nop insertion
+ // done after branch optimization. We go ahead and do it before nop insertion
// to reduce the amount of work needed for searching for opportunities.
Func->doBranchOpt();
Func->dump("After branch optimization");
@@ -495,10 +495,10 @@
Ostream &Str = Func->getContext()->getStrDump();
for (CfgNode *Node : Func->getNodes()) {
// Walk through the instructions, considering each sequence of 3
- // instructions, and look for the particular RMW pattern. Note that this
- // search can be "broken" (false negatives) if there are intervening deleted
- // instructions, or intervening instructions that could be safely moved out
- // of the way to reveal an RMW pattern.
+ // instructions, and look for the particular RMW pattern. Note that this
+ // search can be "broken" (false negatives) if there are intervening
+ // deleted instructions, or intervening instructions that could be safely
+ // moved out of the way to reveal an RMW pattern.
auto E = Node->getInsts().end();
auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
@@ -528,21 +528,21 @@
// problems later.
//
// With this transformation, the Store instruction acquires a Dest
- // variable and is now subject to dead code elimination if there are
- // no more uses of "b". Variable "x" is a beacon for determining
- // whether the Store instruction gets dead-code eliminated. If the
- // Store instruction is eliminated, then it must be the case that
- // the RMW instruction ends x's live range, and therefore the RMW
- // instruction will be retained and later lowered. On the other
- // hand, if the RMW instruction does not end x's live range, then
- // the Store instruction must still be present, and therefore the
- // RMW instruction is ignored during lowering because it is
- // redundant with the Store instruction.
+ // variable and is now subject to dead code elimination if there
+ // are no more uses of "b". Variable "x" is a beacon for
+ // determining whether the Store instruction gets dead-code
+ // eliminated. If the Store instruction is eliminated, then it
+ // must be the case that the RMW instruction ends x's live range,
+ // and therefore the RMW instruction will be retained and later
+ // lowered. On the other hand, if the RMW instruction does not end
+ // x's live range, then the Store instruction must still be
+ // present, and therefore the RMW instruction is ignored during
+ // lowering because it is redundant with the Store instruction.
//
// Note that if "a" has further uses, the RMW transformation may
// still trigger, resulting in two loads and one store, which is
- // worse than the original one load and one store. However, this is
- // probably rare, and caching probably keeps it just as fast.
+ // worse than the original one load and one store. However, this
+ // is probably rare, and caching probably keeps it just as fast.
if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),
Store->getAddr()))
continue;
@@ -589,11 +589,10 @@
return Intrinsics::MemoryOrderInvalid;
}
-/// Determines whether the dest of a Load instruction can be folded
-/// into one of the src operands of a 2-operand instruction. This is
-/// true as long as the load dest matches exactly one of the binary
-/// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
-/// the answer is true.
+/// Determines whether the dest of a Load instruction can be folded into one of
+/// the src operands of a 2-operand instruction. This is true as long as the
+/// load dest matches exactly one of the binary instruction's src operands.
+/// Replaces Src0 or Src1 with LoadSrc if the answer is true.
inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
Operand *&Src0, Operand *&Src1) {
if (Src0 == LoadDest && Src1 != LoadDest) {
@@ -615,8 +614,8 @@
Operand *LoadSrc = nullptr;
Inst *CurInst = Context.getCur();
Inst *Next = Context.getNextInst();
- // Determine whether the current instruction is a Load
- // instruction or equivalent.
+ // Determine whether the current instruction is a Load instruction or
+ // equivalent.
if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
// An InstLoad always qualifies.
LoadDest = Load->getDest();
@@ -624,9 +623,9 @@
LoadSrc = formMemoryOperand(Load->getSourceAddress(),
LoadDest->getType(), DoLegalize);
} else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
- // An AtomicLoad intrinsic qualifies as long as it has a valid
- // memory ordering, and can be implemented in a single
- // instruction (i.e., not i64 on x86-32).
+ // An AtomicLoad intrinsic qualifies as long as it has a valid memory
+ // ordering, and can be implemented in a single instruction (i.e., not
+ // i64 on x86-32).
Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
if (ID == Intrinsics::AtomicLoad &&
(Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
@@ -638,9 +637,9 @@
DoLegalize);
}
}
- // A Load instruction can be folded into the following
- // instruction only if the following instruction ends the Load's
- // Dest variable's live range.
+ // A Load instruction can be folded into the following instruction only
+ // if the following instruction ends the Load's Dest variable's live
+ // range.
if (LoadDest && Next && Next->isLastUse(LoadDest)) {
assert(LoadSrc);
Inst *NewInst = nullptr;
@@ -673,8 +672,7 @@
Select->getCondition(), Src0, Src1);
}
} else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
- // The load dest can always be folded into a Cast
- // instruction.
+ // The load dest can always be folded into a Cast instruction.
Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
if (Src0 == LoadDest) {
NewInst = InstCast::create(Func, Cast->getCastKind(),
@@ -685,8 +683,8 @@
CurInst->setDeleted();
Next->setDeleted();
Context.insert(NewInst);
- // Update NewInst->LiveRangesEnded so that target lowering
- // may benefit. Also update NewInst->HasSideEffects.
+ // Update NewInst->LiveRangesEnded so that target lowering may
+ // benefit. Also update NewInst->HasSideEffects.
NewInst->spliceLivenessInfo(Next, CurInst);
}
}
@@ -721,8 +719,8 @@
Reg = Func->makeVariable(Ty);
Reg->setRegNum(RegNum);
PhysicalRegisters[Ty][RegNum] = Reg;
- // Specially mark esp as an "argument" so that it is considered
- // live upon function entry.
+ // Specially mark esp as an "argument" so that it is considered live upon
+ // function entry.
if (RegNum == Traits::RegisterSet::Reg_esp) {
Func->addImplicitArg(Reg);
Reg->setIgnoreLiveness();
@@ -782,13 +780,12 @@
/// Helper function for addProlog().
///
-/// This assumes Arg is an argument passed on the stack. This sets the
-/// frame offset for Arg and updates InArgsSizeBytes according to Arg's
-/// width. For an I64 arg that has been split into Lo and Hi components,
-/// it calls itself recursively on the components, taking care to handle
-/// Lo first because of the little-endian architecture. Lastly, this
-/// function generates an instruction to copy Arg into its assigned
-/// register if applicable.
+/// This assumes Arg is an argument passed on the stack. This sets the frame
+/// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
+/// I64 arg that has been split into Lo and Hi components, it calls itself
+/// recursively on the components, taking care to handle Lo first because of the
+/// little-endian architecture. Lastly, this function generates an instruction
+/// to copy Arg into its assigned register if applicable.
template <class Machine>
void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
Variable *FramePtr,
@@ -819,8 +816,8 @@
_mov(Arg, Mem);
}
// This argument-copying instruction uses an explicit Traits::X86OperandMem
- // operand instead of a Variable, so its fill-from-stack operation has to be
- // tracked separately for statistics.
+ // operand instead of a Variable, so its fill-from-stack operation has to
+ // be tracked separately for statistics.
Ctx->statsUpdateFills();
}
}
@@ -837,9 +834,8 @@
default:
return;
case IceType_i64:
- // TODO: Only consider F64 if we need to push each half when
- // passing as an argument to a function call. Note that each half
- // is still typed as I32.
+ // TODO: Only consider F64 if we need to push each half when passing as an
+ // argument to a function call. Note that each half is still typed as I32.
case IceType_f64:
break;
}
@@ -946,11 +942,11 @@
template <class Machine>
void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
IsEbpBasedFrame = true;
- // Conservatively require the stack to be aligned. Some stack
- // adjustment operations implemented below assume that the stack is
- // aligned before the alloca. All the alloca code ensures that the
- // stack alignment is preserved after the alloca. The stack alignment
- // restriction can be relaxed in some cases.
+ // Conservatively require the stack to be aligned. Some stack adjustment
+ // operations implemented below assume that the stack is aligned before the
+ // alloca. All the alloca code ensures that the stack alignment is preserved
+ // after the alloca. The stack alignment restriction can be relaxed in some
+ // cases.
NeedsStackAlignment = true;
// TODO(stichnot): minimize the number of adjustments of esp, etc.
@@ -977,8 +973,8 @@
Value = Utils::applyAlignment(Value, Alignment);
_sub(esp, Ctx->getConstantInt32(Value));
} else {
- // Non-constant sizes need to be adjusted to the next highest
- // multiple of the required alignment at runtime.
+ // Non-constant sizes need to be adjusted to the next highest multiple of
+ // the required alignment at runtime.
Variable *T = makeReg(IceType_i32);
_mov(T, TotalSize);
_add(T, Ctx->getConstantInt32(Alignment - 1));
@@ -988,17 +984,16 @@
_mov(Dest, esp);
}
-/// Strength-reduce scalar integer multiplication by a constant (for
-/// i32 or narrower) for certain constants. The lea instruction can be
-/// used to multiply by 3, 5, or 9, and the lsh instruction can be used
-/// to multiply by powers of 2. These can be combined such that
-/// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
-/// combined with left-shifting by 2.
+/// Strength-reduce scalar integer multiplication by a constant (for i32 or
+/// narrower) for certain constants. The lea instruction can be used to multiply
+/// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
+/// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
+/// lea-based multiplies by 5, combined with left-shifting by 2.
template <class Machine>
bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
int32_t Src1) {
- // Disable this optimization for Om1 and O0, just to keep things
- // simple there.
+ // Disable this optimization for Om1 and O0, just to keep things simple
+ // there.
if (Ctx->getFlags().getOptLevel() < Opt_1)
return false;
Type Ty = Dest->getType();
@@ -1054,8 +1049,8 @@
// Lea optimization only works for i16 and i32 types, not i8.
if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
return false;
- // Limit the number of lea/shl operations for a single multiply, to
- // a somewhat arbitrary choice of 3.
+ // Limit the number of lea/shl operations for a single multiply, to a
+ // somewhat arbitrary choice of 3.
const uint32_t MaxOpsForOptimizedMul = 3;
if (CountOps > MaxOpsForOptimizedMul)
return false;
@@ -1101,11 +1096,11 @@
}
if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// These x86-32 helper-call-involved instructions are lowered in this
- // separate switch. This is because loOperand() and hiOperand()
- // may insert redundant instructions for constant blinding and
- // pooling. Such redundant instructions will fail liveness analysis
- // under -Om1 setting. And, actually these arguments do not need
- // to be processed with loOperand() and hiOperand() to be used.
+ // separate switch. This is because loOperand() and hiOperand() may insert
+ // redundant instructions for constant blinding and pooling. Such redundant
+ // instructions will fail liveness analysis under -Om1 setting. And,
+ // actually these arguments do not need to be processed with loOperand()
+ // and hiOperand() to be used.
switch (Inst->getOp()) {
case InstArithmetic::Udiv: {
const SizeT MaxSrcs = 2;
@@ -1216,8 +1211,8 @@
_imul(T_2, Src0Lo);
_mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
_mul(T_4Lo, T_3, Src1Lo);
- // The mul instruction produces two dest variables, edx:eax. We
- // create a fake definition of edx to account for this.
+ // The mul instruction produces two dest variables, edx:eax. We create a
+ // fake definition of edx to account for this.
Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
_mov(DestLo, T_4Lo);
_add(T_4Hi, T_1);
@@ -1253,9 +1248,9 @@
_shl(T_2, T_1);
_test(T_1, BitTest);
_br(Traits::Cond::Br_e, Label);
- // T_2 and T_3 are being assigned again because of the
- // intra-block control flow, so we need the _mov_nonkillable
- // variant to avoid liveness problems.
+ // T_2 and T_3 are being assigned again because of the intra-block
+ // control flow, so we need the _mov_nonkillable variant to avoid
+ // liveness problems.
_mov_nonkillable(T_3, T_2);
_mov_nonkillable(T_2, Zero);
Context.insert(Label);
@@ -1289,9 +1284,9 @@
_shr(T_3, T_1);
_test(T_1, BitTest);
_br(Traits::Cond::Br_e, Label);
- // T_2 and T_3 are being assigned again because of the
- // intra-block control flow, so we need the _mov_nonkillable
- // variant to avoid liveness problems.
+ // T_2 and T_3 are being assigned again because of the intra-block
+ // control flow, so we need the _mov_nonkillable variant to avoid
+ // liveness problems.
_mov_nonkillable(T_2, T_3);
_mov_nonkillable(T_3, Zero);
Context.insert(Label);
@@ -1325,10 +1320,10 @@
_sar(T_3, T_1);
_test(T_1, BitTest);
_br(Traits::Cond::Br_e, Label);
- // T_2 and T_3 are being assigned again because of the
- // intra-block control flow, so T_2 needs the _mov_nonkillable
- // variant to avoid liveness problems. T_3 doesn't need special
- // treatment because it is reassigned via _sar instead of _mov.
+ // T_2 and T_3 are being assigned again because of the intra-block
+ // control flow, so T_2 needs the _mov_nonkillable variant to avoid
+ // liveness problems. T_3 doesn't need special treatment because it is
+ // reassigned via _sar instead of _mov.
_mov_nonkillable(T_2, T_3);
_sar(T_3, SignExtend);
Context.insert(Label);
@@ -1353,8 +1348,8 @@
return;
}
if (isVectorType(Dest->getType())) {
- // TODO: Trap on integer divide and integer modulo by zero.
- // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
+ // TODO: Trap on integer divide and integer modulo by zero. See:
+ // https://code.google.com/p/nativeclient/issues/detail?id=3899
if (llvm::isa<typename Traits::X86OperandMem>(Src1))
Src1 = legalizeToReg(Src1);
switch (Inst->getOp()) {
@@ -1519,8 +1514,8 @@
if (optimizeScalarMul(Dest, Src0, C->getValue()))
return;
}
- // The 8-bit version of imul only allows the form "imul r/m8"
- // where T must be in eax.
+ // The 8-bit version of imul only allows the form "imul r/m8" where T must
+ // be in eax.
if (isByteSizedArithType(Dest->getType())) {
_mov(T, Src0, Traits::RegisterSet::Reg_eax);
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
@@ -1580,11 +1575,11 @@
}
break;
case InstArithmetic::Sdiv:
- // TODO(stichnot): Enable this after doing better performance
- // and cross testing.
+ // TODO(stichnot): Enable this after doing better performance and cross
+ // testing.
if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
- // Optimize division by constant power of 2, but not for Om1
- // or O0, just to keep things simple there.
+ // Optimize division by constant power of 2, but not for Om1 or O0, just
+ // to keep things simple there.
if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
int32_t Divisor = C->getValue();
uint32_t UDivisor = static_cast<uint32_t>(Divisor);
@@ -1600,8 +1595,8 @@
// dest=t
uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
_mov(T, Src0);
- // If for some reason we are dividing by 1, just treat it
- // like an assignment.
+ // If for some reason we are dividing by 1, just treat it like an
+ // assignment.
if (LogDiv > 0) {
// The initial sar is unnecessary when dividing by 2.
if (LogDiv > 1)
@@ -1656,11 +1651,11 @@
}
break;
case InstArithmetic::Srem:
- // TODO(stichnot): Enable this after doing better performance
- // and cross testing.
+ // TODO(stichnot): Enable this after doing better performance and cross
+ // testing.
if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
- // Optimize mod by constant power of 2, but not for Om1 or O0,
- // just to keep things simple there.
+ // Optimize mod by constant power of 2, but not for Om1 or O0, just to
+ // keep things simple there.
if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
int32_t Divisor = C->getValue();
uint32_t UDivisor = static_cast<uint32_t>(Divisor);
@@ -1777,8 +1772,8 @@
// memory.
Src0Legal = legalize(Src0);
} else {
- // If Dest could be a stack operand, then RI must be a physical
- // register or a scalar integer immediate.
+ // If Dest could be a stack operand, then RI must be a physical register
+ // or a scalar integer immediate.
Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm);
}
if (isVectorType(Dest->getType()))
@@ -1803,8 +1798,8 @@
default:
break;
case BoolFolding::PK_Icmp32: {
- // TODO(stichnot): Refactor similarities between this block and
- // the corresponding code in lowerIcmp().
+ // TODO(stichnot): Refactor similarities between this block and the
+ // corresponding code in lowerIcmp().
auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
Operand *Src0 = Producer->getSrc(0);
Operand *Src1 = legalize(Producer->getSrc(1));
@@ -1835,10 +1830,10 @@
case InstCast::Sext: {
// Src0RM is the source operand legalized to physical register or memory,
// but not immediate, since the relevant x86 native instructions don't
- // allow an immediate operand. If the operand is an immediate, we could
- // consider computing the strength-reduced result at translation time,
- // but we're unlikely to see something like that in the bitcode that
- // the optimizer wouldn't have already taken care of.
+ // allow an immediate operand. If the operand is an immediate, we could
+ // consider computing the strength-reduced result at translation time, but
+ // we're unlikely to see something like that in the bitcode that the
+ // optimizer wouldn't have already taken care of.
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
if (isVectorType(Dest->getType())) {
Type DestTy = Dest->getType();
@@ -1898,8 +1893,8 @@
typeWidthInBytes(Src0RM->getType())) {
_mov(T, Src0RM);
} else {
- // Widen the source using movsx or movzx. (It doesn't matter
- // which one, since the following shl/sar overwrite the bits.)
+ // Widen the source using movsx or movzx. (It doesn't matter which one,
+ // since the following shl/sar overwrite the bits.)
_movzx(T, Src0RM);
}
_shl(T, ShiftAmount);
@@ -2010,12 +2005,11 @@
_cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
_movp(Dest, T);
} else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
- // Use a helper for converting floating-point values to 64-bit
- // integers. SSE2 appears to have no way to convert from xmm
- // registers to something like the edx:eax register pair, and
- // gcc and clang both want to use x87 instructions complete with
- // temporary manipulation of the status word. This helper is
- // not needed for x86-64.
+ // Use a helper for converting floating-point values to 64-bit integers.
+ // SSE2 appears to have no way to convert from xmm registers to something
+ // like the edx:eax register pair, and gcc and clang both want to use x87
+ // instructions complete with temporary manipulation of the status word.
+ // This helper is not needed for x86-64.
split64(Dest);
const SizeT MaxSrcs = 1;
Type SrcType = Inst->getSrc(0)->getType();
@@ -2150,8 +2144,8 @@
lowerCall(Call);
} else if (Src0->getType() == IceType_i64 ||
(!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
- // Use a helper for x86-32 and x86-64. Also use a helper for
- // i32 on x86-32.
+ // Use a helper for x86-32 and x86-64. Also use a helper for i32 on
+ // x86-32.
const SizeT MaxSrcs = 1;
Type DestType = Dest->getType();
IceString TargetString;
@@ -2285,8 +2279,8 @@
if (Traits::Is64Bit) {
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Variable *T = makeReg(IceType_f64);
- // Movd requires its fp argument (in this case, the bitcast destination)
- // to be an xmm register.
+ // Movd requires its fp argument (in this case, the bitcast
+ // destination) to be an xmm register.
T->setMustHaveReg();
_movd(T, Src0RM);
_mov(Dest, T);
@@ -2318,8 +2312,8 @@
Func, Spill, Traits::VariableSplit::High);
_mov(T_Lo, loOperand(Src0));
// Technically, the Spill is defined after the _store happens, but
- // SpillLo is considered a "use" of Spill so define Spill before it
- // is used.
+ // SpillLo is considered a "use" of Spill so define Spill before it is
+ // used.
Context.insert(InstFakeDef::create(Func, Spill));
_store(T_Lo, SpillLo);
_mov(T_Hi, hiOperand(Src0));
@@ -2384,8 +2378,8 @@
// Use pshufd and movd/movss.
Variable *T = nullptr;
if (Index) {
- // The shuffle only needs to occur if the element to be extracted
- // is not at the lowest index.
+ // The shuffle only needs to occur if the element to be extracted is not
+ // at the lowest index.
Constant *Mask = Ctx->getConstantInt32(Index);
T = makeReg(Ty);
_pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
@@ -2396,11 +2390,11 @@
if (InVectorElementTy == IceType_i32) {
_movd(ExtractedElementR, T);
} else { // Ty == IceType_f32
- // TODO(wala): _movss is only used here because _mov does not
- // allow a vector source and a scalar destination. _mov should be
- // able to be used here.
- // _movss is a binary instruction, so the FakeDef is needed to
- // keep the live range analysis consistent.
+ // TODO(wala): _movss is only used here because _mov does not allow a
+ // vector source and a scalar destination. _mov should be able to be
+ // used here.
+ // _movss is a binary instruction, so the FakeDef is needed to keep the
+ // live range analysis consistent.
Context.insert(InstFakeDef::create(Func, ExtractedElementR));
_movss(ExtractedElementR, T);
}
@@ -2408,8 +2402,8 @@
assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
// Spill the value to a stack slot and do the extraction in memory.
//
- // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
- // support for legalizing to mem is implemented.
+ // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
+ // for legalizing to mem is implemented.
Variable *Slot = Func->makeVariable(Ty);
Slot->setMustNotHaveReg();
_movp(Slot, legalizeToReg(SourceVectNotLegalized));
@@ -2589,9 +2583,9 @@
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
- // SSE2 only has signed comparison operations. Transform unsigned
- // inputs in a manner that allows for the use of signed comparison
- // operations by flipping the high order bits.
+ // SSE2 only has signed comparison operations. Transform unsigned inputs in
+ // a manner that allows for the use of signed comparison operations by
+ // flipping the high order bits.
if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
Variable *T0 = makeReg(Ty);
@@ -2726,8 +2720,8 @@
Type InVectorElementTy = Traits::getInVectorElementType(Ty);
if (ElementTy == IceType_i1) {
- // Expand the element to the appropriate size for it to be inserted
- // in the vector.
+ // Expand the element to the appropriate size for it to be inserted in the
+ // vector.
Variable *Expanded = Func->makeVariable(InVectorElementTy);
InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
ElementToInsertNotLegalized);
@@ -2773,14 +2767,13 @@
return;
}
- // shufps treats the source and desination operands as vectors of
- // four doublewords. The destination's two high doublewords are
- // selected from the source operand and the two low doublewords are
- // selected from the (original value of) the destination operand.
- // An insertelement operation can be effected with a sequence of two
- // shufps operations with appropriate masks. In all cases below,
- // Element[0] is being inserted into SourceVectOperand. Indices are
- // ordered from left to right.
+ // shufps treats the source and destination operands as vectors of four
+ // doublewords. The destination's two high doublewords are selected from
+ // the source operand and the two low doublewords are selected from the
+ // (original value of) the destination operand. An insertelement operation
+ // can be effected with a sequence of two shufps operations with
+ // appropriate masks. In all cases below, Element[0] is being inserted into
+ // SourceVectOperand. Indices are ordered from left to right.
//
// insertelement into index 1 (result is stored in ElementR):
// ElementR := ElementR[0, 0] SourceVectRM[0, 0]
@@ -2814,11 +2807,10 @@
}
} else {
assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
- // Spill the value to a stack slot and perform the insertion in
- // memory.
+ // Spill the value to a stack slot and perform the insertion in memory.
//
- // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
- // support for legalizing to mem is implemented.
+ // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
+ // for legalizing to mem is implemented.
Variable *Slot = Func->makeVariable(Ty);
Slot->setMustNotHaveReg();
_movp(Slot, legalizeToReg(SourceVectNotLegalized));
@@ -2864,25 +2856,25 @@
_mfence();
return;
case Intrinsics::AtomicFenceAll:
- // NOTE: FenceAll should prevent and load/store from being moved
- // across the fence (both atomic and non-atomic). The InstX8632Mfence
- // instruction is currently marked coarsely as "HasSideEffects".
+ // NOTE: FenceAll should prevent and load/store from being moved across the
+ // fence (both atomic and non-atomic). The InstX8632Mfence instruction is
+ // currently marked coarsely as "HasSideEffects".
_mfence();
return;
case Intrinsics::AtomicIsLockFree: {
// X86 is always lock free for 8/16/32/64 bit accesses.
- // TODO(jvoung): Since the result is constant when given a constant
- // byte size, this opens up DCE opportunities.
+ // TODO(jvoung): Since the result is constant when given a constant byte
+ // size, this opens up DCE opportunities.
Operand *ByteSize = Instr->getArg(0);
Variable *Dest = Instr->getDest();
if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
Constant *Result;
switch (CI->getValue()) {
default:
- // Some x86-64 processors support the cmpxchg16b intruction, which
- // can make 16-byte operations lock free (when used with the LOCK
- // prefix). However, that's not supported in 32-bit mode, so just
- // return 0 even for large sizes.
+ // Some x86-64 processors support the cmpxchg16b instruction, which can
+ // make 16-byte operations lock free (when used with the LOCK prefix).
+ // However, that's not supported in 32-bit mode, so just return 0 even
+ // for large sizes.
Result = Ctx->getConstantZero(IceType_i32);
break;
case 1:
@@ -2900,8 +2892,8 @@
return;
}
case Intrinsics::AtomicLoad: {
- // We require the memory address to be naturally aligned.
- // Given that is the case, then normal loads are atomic.
+ // We require the memory address to be naturally aligned. Given that is the
+ // case, then normal loads are atomic.
if (!Intrinsics::isMemoryOrderValid(
ID, getConstantMemoryOrder(Instr->getArg(1)))) {
Func->setError("Unexpected memory ordering for AtomicLoad");
@@ -2910,10 +2902,10 @@
Variable *Dest = Instr->getDest();
if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// Follow what GCC does and use a movq instead of what lowerLoad()
- // normally does (split the load into two).
- // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
- // can't happen anyway, since this is x86-32 and integer arithmetic only
- // happens on 32-bit quantities.
+ // normally does (split the load into two). Thus, this skips
+ // load/arithmetic op folding. Load/arithmetic folding can't happen
+ // anyway, since this is x86-32 and integer arithmetic only happens on
+ // 32-bit quantities.
Variable *T = makeReg(IceType_f64);
typename Traits::X86OperandMem *Addr =
formMemoryOperand(Instr->getArg(0), IceType_f64);
@@ -2929,8 +2921,8 @@
InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
lowerLoad(Load);
// Make sure the atomic load isn't elided when unused, by adding a FakeUse.
- // Since lowerLoad may fuse the load w/ an arithmetic instruction,
- // insert the FakeUse on the last-inserted instruction's dest.
+ // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
+ // the FakeUse on the last-inserted instruction's dest.
Context.insert(
InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
return;
@@ -2953,15 +2945,15 @@
Func->setError("Unexpected memory ordering for AtomicStore");
return;
}
- // We require the memory address to be naturally aligned.
- // Given that is the case, then normal stores are atomic.
- // Add a fence after the store to make it visible.
+ // We require the memory address to be naturally aligned. Given that is the
+ // case, then normal stores are atomic. Add a fence after the store to make
+ // it visible.
Operand *Value = Instr->getArg(0);
Operand *Ptr = Instr->getArg(1);
if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
- // Use a movq instead of what lowerStore() normally does
- // (split the store into two), following what GCC does.
- // Cast the bits from int -> to an xmm register first.
+ // Use a movq instead of what lowerStore() normally does (split the store
+ // into two), following what GCC does. Cast the bits from int -> to an
+ // xmm register first.
Variable *T = makeReg(IceType_f64);
InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
lowerCast(Cast);
@@ -2980,8 +2972,8 @@
case Intrinsics::Bswap: {
Variable *Dest = Instr->getDest();
Operand *Val = Instr->getArg(0);
- // In 32-bit mode, bswap only works on 32-bit arguments, and the
- // argument must be a register. Use rotate left for 16-bit bswap.
+ // In 32-bit mode, bswap only works on 32-bit arguments, and the argument
+ // must be a register. Use rotate left for 16-bit bswap.
if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
Val = legalizeUndef(Val);
Variable *T_Lo = legalizeToReg(loOperand(Val));
@@ -3070,8 +3062,8 @@
return;
}
case Intrinsics::Ctlz: {
- // The "is zero undef" parameter is ignored and we always return
- // a well-defined value.
+ // The "is zero undef" parameter is ignored and we always return a
+ // well-defined value.
Operand *Val = legalize(Instr->getArg(0));
Operand *FirstVal;
Operand *SecondVal = nullptr;
@@ -3087,8 +3079,8 @@
return;
}
case Intrinsics::Cttz: {
- // The "is zero undef" parameter is ignored and we always return
- // a well-defined value.
+ // The "is zero undef" parameter is ignored and we always return a
+ // well-defined value.
Operand *Val = legalize(Instr->getArg(0));
Operand *FirstVal;
Operand *SecondVal = nullptr;
@@ -3108,8 +3100,8 @@
Type Ty = Src->getType();
Variable *Dest = Instr->getDest();
Variable *T = makeVectorOfFabsMask(Ty);
- // The pand instruction operates on an m128 memory operand, so if
- // Src is an f32 or f64, we need to make sure it's in a register.
+ // The pand instruction operates on an m128 memory operand, so if Src is an
+ // f32 or f64, we need to make sure it's in a register.
if (isVectorType(Ty)) {
if (llvm::isa<typename Traits::X86OperandMem>(Src))
Src = legalizeToReg(Src);
@@ -3694,8 +3686,8 @@
Variable *Reg;
// Copy the data into registers as the source and destination could overlap
- // so make sure not to clobber the memory. This also means overlapping moves
- // can be used as we are taking a safe snapshot of the memory.
+ // so make sure not to clobber the memory. This also means overlapping
+ // moves can be used as we are taking a safe snapshot of the memory.
Type Ty = largestTypeInSize(CountValue);
uint32_t TyWidth = typeWidthInBytes(Ty);
@@ -3896,8 +3888,7 @@
inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata,
Variable *&Var, const Inst *&Reason) {
- // Var originates from Var=SrcVar ==>
- // set Var:=SrcVar
+ // Var originates from Var=SrcVar ==> set Var:=SrcVar
if (Var == nullptr)
return false;
if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
@@ -4059,10 +4050,10 @@
(void)Offset; // TODO: pattern-match for non-zero offsets.
if (Base == nullptr)
return;
- // If the Base has more than one use or is live across multiple
- // blocks, then don't go further. Alternatively (?), never consider
- // a transformation that would change a variable that is currently
- // *not* live across basic block boundaries into one that *is*.
+ // If the Base has more than one use or is live across multiple blocks, then
+ // don't go further. Alternatively (?), never consider a transformation that
+ // would change a variable that is currently *not* live across basic block
+ // boundaries into one that *is*.
if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
return;
@@ -4232,8 +4223,8 @@
Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
if (InstructionSet >= Traits::SSE4_1) {
- // TODO(wala): If the condition operand is a constant, use blendps
- // or pblendw.
+ // TODO(wala): If the condition operand is a constant, use blendps or
+ // pblendw.
//
// Use blendvps or pblendvb to implement select.
if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
@@ -4310,8 +4301,8 @@
_cmp(CmpOpnd0, CmpOpnd1);
if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
- // The cmov instruction doesn't allow 8-bit or FP operands, so
- // we need explicit control flow.
+ // The cmov instruction doesn't allow 8-bit or FP operands, so we need
+ // explicit control flow.
// d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
typename Traits::Insts::Label *Label =
Traits::Insts::Label::create(Func, this);
@@ -4324,8 +4315,8 @@
return;
}
// mov t, SrcF; cmov_cond t, SrcT; mov dest, t
- // But if SrcT is immediate, we might be able to do better, as
- // the cmov instruction doesn't allow an immediate operand:
+ // But if SrcT is immediate, we might be able to do better, as the cmov
+ // instruction doesn't allow an immediate operand:
// mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
std::swap(SrcT, SrcF);
@@ -4686,8 +4677,8 @@
/// %cmp.ext = sext <n x i1> %cmp to <n x ty>
///
/// We can eliminate the sext operation by copying the result of pcmpeqd,
-/// pcmpgtd, or cmpps (which produce sign extended results) to the result
-/// of the sext operation.
+/// pcmpgtd, or cmpps (which produce sign extended results) to the result of the
+/// sext operation.
template <class Machine>
void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(
Variable *SignExtendedResult) {
@@ -4712,13 +4703,12 @@
template <class Machine>
void TargetX86Base<Machine>::lowerRMW(
const typename Traits::Insts::FakeRMW *RMW) {
- // If the beacon variable's live range does not end in this
- // instruction, then it must end in the modified Store instruction
- // that follows. This means that the original Store instruction is
- // still there, either because the value being stored is used beyond
- // the Store instruction, or because dead code elimination did not
- // happen. In either case, we cancel RMW lowering (and the caller
- // deletes the RMW instruction).
+ // If the beacon variable's live range does not end in this instruction, then
+ // it must end in the modified Store instruction that follows. This means
+ // that the original Store instruction is still there, either because the
+ // value being stored is used beyond the Store instruction, or because dead
+ // code elimination did not happen. In either case, we cancel RMW lowering
+ // (and the caller deletes the RMW instruction).
if (!RMW->isLastUse(RMW->getBeacon()))
return;
Operand *Src = RMW->getData();
@@ -4800,10 +4790,9 @@
}
}
-/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
-/// preserve integrity of liveness analysis. Undef values are also
-/// turned into zeroes, since loOperand() and hiOperand() don't expect
-/// Undef input.
+/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
+/// integrity of liveness analysis. Undef values are also turned into zeroes,
+/// since loOperand() and hiOperand() don't expect Undef input.
template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
if (Traits::Is64Bit) {
// On x86-64 we don't need to prelower phis -- the architecture can handle
@@ -4811,25 +4800,25 @@
return;
}
- // Pause constant blinding or pooling, blinding or pooling will be done
- // later during phi lowering assignments
+ // Pause constant blinding or pooling, blinding or pooling will be done later
+ // during phi lowering assignments
BoolFlagSaver B(RandomizationPoolingPaused, true);
PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
this, Context.getNode(), Func);
}
-// There is no support for loading or emitting vector constants, so the
-// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
-// etc. are initialized with register operations.
+// There is no support for loading or emitting vector constants, so the vector
+// values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are
+// initialized with register operations.
//
-// TODO(wala): Add limited support for vector constants so that
-// complex initialization in registers is unnecessary.
+// TODO(wala): Add limited support for vector constants so that complex
+// initialization in registers is unnecessary.
template <class Machine>
Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Variable *Reg = makeReg(Ty, RegNum);
- // Insert a FakeDef, since otherwise the live range of Reg might
- // be overestimated.
+ // Insert a FakeDef, since otherwise the live range of Reg might be
+ // overestimated.
Context.insert(InstFakeDef::create(Func, Reg));
_pxor(Reg, Reg);
return Reg;
@@ -4875,12 +4864,12 @@
}
}
-/// Construct a mask in a register that can be and'ed with a
-/// floating-point value to mask off its sign bit. The value will be
-/// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
-/// for f64. Construct it as vector of ones logically right shifted
-/// one bit. TODO(stichnot): Fix the wala TODO above, to represent
-/// vector constants in memory.
+/// Construct a mask in a register that can be and'ed with a floating-point
+/// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32
+/// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of
+/// ones logically right shifted one bit.
+// TODO(stichnot): Fix the wala
+// TODO: above, to represent vector constants in memory.
template <class Machine>
Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
int32_t RegNum) {
@@ -4897,9 +4886,9 @@
assert(Slot->mustNotHaveReg());
assert(Slot->getRegNum() == Variable::NoRegister);
// Compute the location of Loc in memory.
- // TODO(wala,stichnot): lea should not be required. The address of
- // the stack slot is known at compile time (although not until after
- // addProlog()).
+ // TODO(wala,stichnot): lea should not
+ // be required. The address of the stack slot is known at compile time
+ // (although not until after addProlog()).
const Type PointerType = IceType_i32;
Variable *Loc = makeReg(PointerType);
_lea(Loc, Slot);
@@ -4925,20 +4914,19 @@
Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
int32_t RegNum) {
Type Ty = From->getType();
- // Assert that a physical register is allowed. To date, all calls
- // to legalize() allow a physical register. If a physical register
- // needs to be explicitly disallowed, then new code will need to be
- // written to force a spill.
+ // Assert that a physical register is allowed. To date, all calls to
+ // legalize() allow a physical register. If a physical register needs to be
+ // explicitly disallowed, then new code will need to be written to force a
+ // spill.
assert(Allowed & Legal_Reg);
- // If we're asking for a specific physical register, make sure we're
- // not allowing any other operand kinds. (This could be future
- // work, e.g. allow the shl shift amount to be either an immediate
- // or in ecx.)
+ // If we're asking for a specific physical register, make sure we're not
+ // allowing any other operand kinds. (This could be future work, e.g. allow
+ // the shl shift amount to be either an immediate or in ecx.)
assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
- // Before doing anything with a Mem operand, we need to ensure
- // that the Base and Index components are in physical registers.
+ // Before doing anything with a Mem operand, we need to ensure that the
+ // Base and Index components are in physical registers.
Variable *Base = Mem->getBase();
Variable *Index = Mem->getIndex();
Variable *RegBase = nullptr;
@@ -4983,8 +4971,8 @@
}
}
- // If the operand is an 32 bit constant integer, we should check
- // whether we need to randomize it or pool it.
+ // If the operand is an 32 bit constant integer, we should check whether we
+ // need to randomize it or pool it.
if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
if (NewConst != Const) {
@@ -4992,8 +4980,8 @@
}
}
- // Convert a scalar floating point constant into an explicit
- // memory operand.
+ // Convert a scalar floating point constant into an explicit memory
+ // operand.
if (isScalarFloatingType(Ty)) {
Variable *Base = nullptr;
std::string Buffer;
@@ -5016,9 +5004,9 @@
return From;
}
if (auto Var = llvm::dyn_cast<Variable>(From)) {
- // Check if the variable is guaranteed a physical register. This
- // can happen either when the variable is pre-colored or when it is
- // assigned infinite weight.
+ // Check if the variable is guaranteed a physical register. This can happen
+ // either when the variable is pre-colored or when it is assigned infinite
+ // weight.
bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
// We need a new physical register for the operand if:
// Mem is not allowed and Var isn't guaranteed a physical
@@ -5046,16 +5034,16 @@
Type Ty = From->getType();
if (llvm::isa<ConstantUndef>(From)) {
// Lower undefs to zero. Another option is to lower undefs to an
- // uninitialized register; however, using an uninitialized register
- // results in less predictable code.
+ // uninitialized register; however, using an uninitialized register results
+ // in less predictable code.
//
- // If in the future the implementation is changed to lower undef
- // values to uninitialized registers, a FakeDef will be needed:
+ // If in the future the implementation is changed to lower undef values to
+ // uninitialized registers, a FakeDef will be needed:
// Context.insert(InstFakeDef::create(Func, Reg));
// This is in order to ensure that the live range of Reg is not
- // overestimated. If the constant being lowered is a 64 bit value,
- // then the result should be split and the lo and hi components will
- // need to go in uninitialized registers.
+ // overestimated. If the constant being lowered is a 64 bit value, then
+ // the result should be split and the lo and hi components will need to go
+ // in uninitialized registers.
if (isVectorType(Ty))
return makeVectorOfZeros(Ty, RegNum);
return Ctx->getConstantZero(Ty);
@@ -5063,12 +5051,11 @@
return From;
}
-/// For the cmp instruction, if Src1 is an immediate, or known to be a
-/// physical register, we can allow Src0 to be a memory operand.
-/// Otherwise, Src0 must be copied into a physical register.
-/// (Actually, either Src0 or Src1 can be chosen for the physical
-/// register, but unfortunately we have to commit to one or the other
-/// before register allocation.)
+/// For the cmp instruction, if Src1 is an immediate, or known to be a physical
+/// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be
+/// copied into a physical register. (Actually, either Src0 or Src1 can be
+/// chosen for the physical register, but unfortunately we have to commit to one
+/// or the other before register allocation.)
template <class Machine>
Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,
Operand *Src1) {
@@ -5095,11 +5082,10 @@
Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
assert(Base || Offset);
if (Offset) {
- // During memory operand building, we do not blind or pool
- // the constant offset, we will work on the whole memory
- // operand later as one entity later, this save one instruction.
- // By turning blinding and pooling off, we guarantee
- // legalize(Offset) will return a Constant*.
+ // During memory operand building, we do not blind or pool the constant
+ // offset, we will work on the whole memory operand later as one entity
+ // later, this save one instruction. By turning blinding and pooling off,
+ // we guarantee legalize(Offset) will return a Constant*.
{
BoolFlagSaver B(RandomizationPoolingPaused, true);
@@ -5111,8 +5097,8 @@
}
Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
}
- // Do legalization, which contains randomization/pooling
- // or do randomization/pooling.
+ // Do legalization, which contains randomization/pooling or do
+ // randomization/pooling.
return llvm::cast<typename Traits::X86OperandMem>(
DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
}
@@ -5235,11 +5221,10 @@
// insert: lea -cookie[Reg], Reg
// => Reg
// If we have already assigned a phy register, we must come from
- // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
- // the assigned register as this assignment is that start of its use-def
- // chain. So we add RegNum argument here.
- // Note we use 'lea' instruction instead of 'xor' to avoid affecting
- // the flags.
+ // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
+ // assigned register as this assignment is that start of its use-def
+ // chain. So we add RegNum argument here. Note we use 'lea' instruction
+ // instead of 'xor' to avoid affecting the flags.
Variable *Reg = makeReg(IceType_i32, RegNum);
ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);
uint32_t Value = Integer->getValue();
@@ -5268,8 +5253,8 @@
assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
Immediate->setShouldBePooled(true);
// if we have already assigned a phy register, we must come from
- // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
- // the assigned register as this assignment is that start of its use-def
+ // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
+ // assigned register as this assignment is that start of its use-def
// chain. So we add RegNum argument here.
Variable *Reg = makeReg(Immediate->getType(), RegNum);
IceString Label;
@@ -5302,8 +5287,8 @@
return MemOperand;
}
- // If this memory operand is already a randommized one, we do
- // not randomize it again.
+ // If this memory operand is already a randomized one, we do not randomize it
+ // again.
if (MemOperand->getRandomized())
return MemOperand;
@@ -5338,9 +5323,8 @@
Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
_lea(RegTemp, TempMemOperand);
// As source operand doesn't use the dstreg, we don't need to add
- // _set_dest_nonkillable().
- // But if we use the same Dest Reg, that is, with RegNum
- // assigned, we should add this _set_dest_nonkillable()
+ // _set_dest_nonkillable(). But if we use the same Dest Reg, that is,
+ // with RegNum assigned, we should add this _set_dest_nonkillable()
if (RegNum != Variable::NoRegister)
_set_dest_nonkillable();
@@ -5366,12 +5350,11 @@
// =>[RegTemp, index, shift]
assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
RPI_Pool);
- // Memory operand should never exist as source operands in phi
- // lowering assignments, so there is no need to reuse any registers
- // here. For phi lowering, we should not ask for new physical
- // registers in general.
- // However, if we do meet Memory Operand during phi lowering, we
- // should not blind or pool the immediates for now.
+ // Memory operand should never exist as source operands in phi lowering
+ // assignments, so there is no need to reuse any registers here. For
+ // phi lowering, we should not ask for new physical registers in
+ // general. However, if we do meet Memory Operand during phi lowering,
+ // we should not blind or pool the immediates for now.
if (RegNum != Variable::NoRegister)
return MemOperand;
Variable *RegTemp = makeReg(IceType_i32);
diff --git a/src/IceThreading.h b/src/IceThreading.h
index f59f46e..b0bcc01 100644
--- a/src/IceThreading.h
+++ b/src/IceThreading.h
@@ -22,31 +22,29 @@
namespace Ice {
-/// BoundedProducerConsumerQueue is a work queue that allows multiple
-/// producers and multiple consumers. A producer adds entries using
-/// blockingPush(), and may block if the queue is "full". A producer
-/// uses notifyEnd() to indicate that no more entries will be added. A
-/// consumer removes an item using blockingPop(), which will return
-/// nullptr if notifyEnd() has been called and the queue is empty (it
-/// never returns nullptr if the queue contained any items).
+/// BoundedProducerConsumerQueue is a work queue that allows multiple producers
+/// and multiple consumers. A producer adds entries using blockingPush(), and
+/// may block if the queue is "full". A producer uses notifyEnd() to indicate
+/// that no more entries will be added. A consumer removes an item using
+/// blockingPop(), which will return nullptr if notifyEnd() has been called and
+/// the queue is empty (it never returns nullptr if the queue contained any
+/// items).
///
-/// The MaxSize ctor arg controls the maximum size the queue can grow
-/// to (subject to a hard limit of MaxStaticSize-1). The Sequential
-/// arg indicates purely sequential execution in which the single
-/// thread should never wait().
+/// The MaxSize ctor arg controls the maximum size the queue can grow to
+/// (subject to a hard limit of MaxStaticSize-1). The Sequential arg indicates
+/// purely sequential execution in which the single thread should never wait().
///
-/// Two condition variables are used in the implementation.
-/// GrewOrEnded signals a waiting worker that a producer has changed
-/// the state of the queue. Shrunk signals a blocked producer that a
-/// consumer has changed the state of the queue.
+/// Two condition variables are used in the implementation. GrewOrEnded signals
+/// a waiting worker that a producer has changed the state of the queue. Shrunk
+/// signals a blocked producer that a consumer has changed the state of the
+/// queue.
///
-/// The methods begin with Sequential-specific code to be most clear.
-/// The lock and condition variables are not used in the Sequential
-/// case.
+/// The methods begin with Sequential-specific code to be most clear. The lock
+/// and condition variables are not used in the Sequential case.
///
/// Internally, the queue is implemented as a circular array of size
-/// MaxStaticSize, where the queue boundaries are denoted by the Front
-/// and Back fields. Front==Back indicates an empty queue.
+/// MaxStaticSize, where the queue boundaries are denoted by the Front and Back
+/// fields. Front==Back indicates an empty queue.
template <typename T, size_t MaxStaticSize = 128>
class BoundedProducerConsumerQueue {
BoundedProducerConsumerQueue() = delete;
@@ -60,8 +58,8 @@
void blockingPush(T *Item) {
{
std::unique_lock<GlobalLockType> L(Lock);
- // If the work queue is already "full", wait for a consumer to
- // grab an element and shrink the queue.
+ // If the work queue is already "full", wait for a consumer to grab an
+ // element and shrink the queue.
Shrunk.wait(L, [this] { return size() < MaxSize || Sequential; });
push(Item);
}
@@ -103,27 +101,23 @@
GlobalLockType Lock;
ICE_CACHELINE_BOUNDARY;
- /// GrewOrEnded is written by the producers and read by the
- /// consumers. It is notified (by the producer) when something is
- /// added to the queue, in case consumers are waiting for a non-empty
- /// queue.
+ /// GrewOrEnded is written by the producers and read by the consumers. It is
+ /// notified (by the producer) when something is added to the queue, in case
+ /// consumers are waiting for a non-empty queue.
std::condition_variable GrewOrEnded;
- /// Back is the index into WorkItems[] of where the next element will
- /// be pushed. (More precisely, Back&MaxStaticSize is the index.)
- /// It is written by the producers, and read by all via size() and
- /// empty().
+ /// Back is the index into WorkItems[] of where the next element will be
+ /// pushed. (More precisely, Back&MaxStaticSize is the index.) It is written
+ /// by the producers, and read by all via size() and empty().
size_t Back = 0;
ICE_CACHELINE_BOUNDARY;
- /// Shrunk is notified (by the consumer) when something is removed
- /// from the queue, in case a producer is waiting for the queue to
- /// drop below maximum capacity. It is written by the consumers and
- /// read by the producers.
+ /// Shrunk is notified (by the consumer) when something is removed from the
+ /// queue, in case a producer is waiting for the queue to drop below maximum
+ /// capacity. It is written by the consumers and read by the producers.
std::condition_variable Shrunk;
- /// Front is the index into WorkItems[] of the oldest element,
- /// i.e. the next to be popped. (More precisely Front&MaxStaticSize
- /// is the index.) It is written by the consumers, and read by all
- /// via size() and empty().
+ /// Front is the index into WorkItems[] of the oldest element, i.e. the next
+ /// to be popped. (More precisely Front&MaxStaticSize is the index.) It is
+ /// written by the consumers, and read by all via size() and empty().
size_t Front = 0;
ICE_CACHELINE_BOUNDARY;
@@ -131,8 +125,7 @@
/// MaxSize and Sequential are read by all and written by none.
const size_t MaxSize;
const bool Sequential;
- /// IsEnded is read by the consumers, and only written once by the
- /// producer.
+ /// IsEnded is read by the consumers, and only written once by the producer.
bool IsEnded = false;
/// The lock must be held when the following methods are called.
@@ -148,15 +141,14 @@
}
};
-/// EmitterWorkItem is a simple wrapper around a pointer that
-/// represents a work item to be emitted, i.e. a function or a set of
-/// global declarations and initializers, and it includes a sequence
-/// number so that work items can be emitted in a particular order for
-/// deterministic output. It acts like an interface class, but instead
-/// of making the classes of interest inherit from EmitterWorkItem, it
-/// wraps pointers to these classes. Some space is wasted compared to
-/// storing the pointers in a union, but not too much due to the work
-/// granularity.
+/// EmitterWorkItem is a simple wrapper around a pointer that represents a work
+/// item to be emitted, i.e. a function or a set of global declarations and
+/// initializers, and it includes a sequence number so that work items can be
+/// emitted in a particular order for deterministic output. It acts like an
+/// interface class, but instead of making the classes of interest inherit from
+/// EmitterWorkItem, it wraps pointers to these classes. Some space is wasted
+/// compared to storing the pointers in a union, but not too much due to the
+/// work granularity.
class EmitterWorkItem {
EmitterWorkItem() = delete;
EmitterWorkItem(const EmitterWorkItem &) = delete;
@@ -165,20 +157,19 @@
public:
/// ItemKind can be one of the following:
///
- /// WI_Nop: No actual work. This is a placeholder to maintain
- /// sequence numbers in case there is a translation error.
+ /// WI_Nop: No actual work. This is a placeholder to maintain sequence numbers
+ /// in case there is a translation error.
///
/// WI_GlobalInits: A list of global declarations and initializers.
///
- /// WI_Asm: A function that has already had emitIAS() called on it.
- /// The work is transferred via the Assembler buffer, and the
- /// originating Cfg has been deleted (to recover lots of memory).
+ /// WI_Asm: A function that has already had emitIAS() called on it. The work
+ /// is transferred via the Assembler buffer, and the originating Cfg has been
+ /// deleted (to recover lots of memory).
///
- /// WI_Cfg: A Cfg that has not yet had emit() or emitIAS() called on
- /// it. This is only used as a debugging configuration when we want
- /// to emit "readable" assembly code, possibly annotated with
- /// liveness and other information only available in the Cfg and not
- /// in the Assembler buffer.
+ /// WI_Cfg: A Cfg that has not yet had emit() or emitIAS() called on it. This
+ /// is only used as a debugging configuration when we want to emit "readable"
+ /// assembly code, possibly annotated with liveness and other information only
+ /// available in the Cfg and not in the Assembler buffer.
enum ItemKind { WI_Nop, WI_GlobalInits, WI_Asm, WI_Cfg };
/// Constructor for a WI_Nop work item.
explicit EmitterWorkItem(uint32_t Seq);
diff --git a/src/IceTimerTree.cpp b/src/IceTimerTree.cpp
index dc4622d..133cd41 100644
--- a/src/IceTimerTree.cpp
+++ b/src/IceTimerTree.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file defines the TimerTree class, which tracks flat and
-/// cumulative execution time collection of call chains.
+/// This file defines the TimerTree class, which tracks flat and cumulative
+/// execution time collection of call chains.
///
//===----------------------------------------------------------------------===//
@@ -41,8 +41,7 @@
#undef STR
}
-// Returns the unique timer ID for the given Name, creating a new ID
-// if needed.
+// Returns the unique timer ID for the given Name, creating a new ID if needed.
TimerIdT TimerStack::getTimerID(const IceString &Name) {
if (!BuildDefs::dump())
return 0;
@@ -55,9 +54,9 @@
return IDsIndex[Name];
}
-// Creates a mapping from TimerIdT (leaf) values in the Src timer
-// stack into TimerIdT values in this timer stack. Creates new
-// entries in this timer stack as needed.
+// Creates a mapping from TimerIdT (leaf) values in the Src timer stack into
+// TimerIdT values in this timer stack. Creates new entries in this timer stack
+// as needed.
TimerStack::TranslationType
TimerStack::translateIDsFrom(const TimerStack &Src) {
size_t Size = Src.IDs.size();
@@ -68,8 +67,8 @@
return Mapping;
}
-// Merges two timer stacks, by combining and summing corresponding
-// entries. This timer stack is updated from Src.
+// Merges two timer stacks, by combining and summing corresponding entries.
+// This timer stack is updated from Src.
void TimerStack::mergeFrom(const TimerStack &Src) {
if (!BuildDefs::dump())
return;
@@ -78,11 +77,11 @@
for (const TimerTreeNode &SrcNode : Src.Nodes) {
// The first node is reserved as a sentinel, so avoid it.
if (SrcIndex > 0) {
- // Find the full path to the Src node, translated to path
- // components corresponding to this timer stack.
+ // Find the full path to the Src node, translated to path components
+ // corresponding to this timer stack.
PathType MyPath = Src.getPath(SrcIndex, Mapping);
- // Find a node in this timer stack corresponding to the given
- // path, creating new interior nodes as necessary.
+ // Find a node in this timer stack corresponding to the given path,
+ // creating new interior nodes as necessary.
TTindex MyIndex = findPath(MyPath);
Nodes[MyIndex].Time += SrcNode.Time;
Nodes[MyIndex].UpdateCount += SrcNode.UpdateCount;
@@ -96,10 +95,9 @@
StateChangeCount += Src.StateChangeCount;
}
-// Constructs a path consisting of the sequence of leaf values leading
-// to a given node, with the Mapping translation applied to the leaf
-// values. The path ends up being in "reverse" order, i.e. from leaf
-// to root.
+// Constructs a path consisting of the sequence of leaf values leading to a
+// given node, with the Mapping translation applied to the leaf values. The
+// path ends up being in "reverse" order, i.e. from leaf to root.
TimerStack::PathType TimerStack::getPath(TTindex Index,
const TranslationType &Mapping) const {
PathType Path;
@@ -111,8 +109,8 @@
return Path;
}
-// Given a parent node and a leaf ID, returns the index of the
-// parent's child ID, creating a new node for the child as necessary.
+// Given a parent node and a leaf ID, returns the index of the parent's child
+// ID, creating a new node for the child as necessary.
TimerStack::TTindex TimerStack::getChildIndex(TimerStack::TTindex Parent,
TimerIdT ID) {
if (Nodes[Parent].Children.size() <= ID)
@@ -127,12 +125,12 @@
return Nodes[Parent].Children[ID];
}
-// Finds a node in the timer stack corresponding to the given path,
-// creating new interior nodes as necessary.
+// Finds a node in the timer stack corresponding to the given path, creating
+// new interior nodes as necessary.
TimerStack::TTindex TimerStack::findPath(const PathType &Path) {
TTindex CurIndex = 0;
- // The path is in reverse order (leaf to root), so it needs to be
- // followed in reverse.
+ // The path is in reverse order (leaf to root), so it needs to be followed in
+ // reverse.
for (TTindex Index : reverse_range(Path)) {
CurIndex = getChildIndex(CurIndex, Index);
}
@@ -150,8 +148,8 @@
assert(StackTop);
}
-// Pops the top marker from the timer stack. Validates via assert()
-// that the expected marker is popped.
+// Pops the top marker from the timer stack. Validates via assert() that the
+// expected marker is popped.
void TimerStack::pop(TimerIdT ID) {
if (!BuildDefs::dump())
return;
@@ -167,15 +165,15 @@
StackTop = Nodes[StackTop].Parent;
}
-// At a state change (e.g. push or pop), updates the flat and
-// cumulative timings for everything on the timer stack.
+// At a state change (e.g. push or pop), updates the flat and cumulative
+// timings for everything on the timer stack.
void TimerStack::update(bool UpdateCounts) {
if (!BuildDefs::dump())
return;
++StateChangeCount;
- // Whenever the stack is about to change, we grab the time delta
- // since the last change and add it to all active cumulative
- // elements and to the flat element for the top of the stack.
+ // Whenever the stack is about to change, we grab the time delta since the
+ // last change and add it to all active cumulative elements and to the flat
+ // element for the top of the stack.
double Current = timestamp();
double Delta = Current - LastTimestamp;
if (StackTop) {
@@ -198,10 +196,10 @@
assert(Next < Prefix);
Prefix = Next;
}
- // Capture the next timestamp *after* the updates are finished.
- // This minimizes how much the timer can perturb the reported
- // timing. The numbers may not sum to 100%, and the missing amount
- // is indicative of the overhead of timing.
+ // Capture the next timestamp *after* the updates are finished. This
+ // minimizes how much the timer can perturb the reported timing. The numbers
+ // may not sum to 100%, and the missing amount is indicative of the overhead
+ // of timing.
LastTimestamp = timestamp();
}
@@ -234,8 +232,8 @@
}
}
-// Write a printf() format string into Buf[], in the format "[%5lu] ",
-// where "5" is actually the number of digits in MaxVal. E.g.,
+// Write a printf() format string into Buf[], in the format "[%5lu] ", where
+// "5" is actually the number of digits in MaxVal. E.g.,
// MaxVal=0 ==> "[%1lu] "
// MaxVal=5 ==> "[%1lu] "
// MaxVal=9876 ==> "[%4lu] "
diff --git a/src/IceTimerTree.def b/src/IceTimerTree.def
index 6db9fbc..6e12219 100644
--- a/src/IceTimerTree.def
+++ b/src/IceTimerTree.def
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file lists predefined timing tags. New tags can be added to
-// avoid a runtime string lookup.
+// This file lists predefined timing tags. New tags can be added to avoid a
+// runtime string lookup.
//
//===----------------------------------------------------------------------===//
diff --git a/src/IceTimerTree.h b/src/IceTimerTree.h
index 98bbdda..796ce8c 100644
--- a/src/IceTimerTree.h
+++ b/src/IceTimerTree.h
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares the TimerTree class, which allows flat and
-/// cumulative execution time collection of call chains.
+/// This file declares the TimerTree class, which allows flat and cumulative
+/// execution time collection of call chains.
///
//===----------------------------------------------------------------------===//
@@ -26,23 +26,22 @@
TimerStack() = delete;
TimerStack &operator=(const TimerStack &) = delete;
- /// Timer tree index type. A variable of this type is used to access
- /// an interior, not-necessarily-leaf node of the tree.
+ /// Timer tree index type. A variable of this type is used to access an
+ /// interior, not-necessarily-leaf node of the tree.
using TTindex = std::vector<class TimerTreeNode>::size_type;
- /// Representation of a path of leaf values leading to a particular
- /// node. The representation happens to be in "reverse" order,
- /// i.e. from leaf/interior to root, for implementation efficiency.
+ /// Representation of a path of leaf values leading to a particular node. The
+ /// representation happens to be in "reverse" order, i.e. from leaf/interior
+ /// to root, for implementation efficiency.
using PathType = llvm::SmallVector<TTindex, 8>;
- /// Representation of a mapping of leaf node indexes from one timer
- /// stack to another.
+ /// Representation of a mapping of leaf node indexes from one timer stack to
+ /// another.
using TranslationType = std::vector<TimerIdT>;
- /// TimerTreeNode represents an interior or leaf node in the call tree.
- /// It contains a list of children, a pointer to its parent, and the
- /// timer ID for the node. It also holds the cumulative time spent at
- /// this node and below. The children are always at a higher index in
- /// the TimerTreeNode::Nodes array, and the parent is always at a lower
- /// index.
+ /// TimerTreeNode represents an interior or leaf node in the call tree. It
+ /// contains a list of children, a pointer to its parent, and the timer ID for
+ /// the node. It also holds the cumulative time spent at this node and below.
+ /// The children are always at a higher index in the TimerTreeNode::Nodes
+ /// array, and the parent is always at a lower index.
class TimerTreeNode {
TimerTreeNode &operator=(const TimerTreeNode &) = delete;
diff --git a/src/IceTranslator.cpp b/src/IceTranslator.cpp
index e3a32dc..68f2b1f 100644
--- a/src/IceTranslator.cpp
+++ b/src/IceTranslator.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file defines the general driver class for translating ICE to
-/// machine code.
+/// This file defines the general driver class for translating ICE to machine
+/// code.
///
//===----------------------------------------------------------------------===//
diff --git a/src/IceTranslator.h b/src/IceTranslator.h
index 449b216..415965a 100644
--- a/src/IceTranslator.h
+++ b/src/IceTranslator.h
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares the general driver class for translating ICE to
-/// machine code.
+/// This file declares the general driver class for translating ICE to machine
+/// code.
///
//===----------------------------------------------------------------------===//
@@ -30,7 +30,7 @@
class VariableDeclaration;
class GlobalContext;
-/// Base class for translating ICE to machine code. Derived classes convert
+/// Base class for translating ICE to machine code. Derived classes convert
/// other intermediate representations down to ICE, and then call the
/// appropriate (inherited) methods to convert ICE into machine instructions.
class Translator {
@@ -48,21 +48,21 @@
const ClFlags &getFlags() const { return Ctx->getFlags(); }
- /// Translates the constructed ICE function Fcn to machine code.
- /// Takes ownership of Func.
+ /// Translates the constructed ICE function Fcn to machine code. Takes
+ /// ownership of Func.
void translateFcn(std::unique_ptr<Cfg> Func);
- /// Lowers the given list of global addresses to target. Generates
- /// list of corresponding variable declarations.
+ /// Lowers the given list of global addresses to target. Generates list of
+ /// corresponding variable declarations.
void
lowerGlobals(std::unique_ptr<VariableDeclarationList> VariableDeclarations);
/// Creates a name using the given prefix and corresponding index.
std::string createUnnamedName(const IceString &Prefix, SizeT Index);
- /// Reports if there is a (potential) conflict between Name, and using
- /// Prefix to name unnamed names. Errors are put on Ostream.
- /// Returns true if there isn't a potential conflict.
+ /// Reports if there is a (potential) conflict between Name, and using Prefix
+ /// to name unnamed names. Errors are put on Ostream. Returns true if there
+ /// isn't a potential conflict.
bool checkIfUnnamedNameSafe(const IceString &Name, const char *Kind,
const IceString &Prefix);
diff --git a/src/IceTypeConverter.h b/src/IceTypeConverter.h
index cb3536c..c61423c 100644
--- a/src/IceTypeConverter.h
+++ b/src/IceTypeConverter.h
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file defines how to convert LLVM types to ICE types, and ICE types
-/// to LLVM types.
+/// This file defines how to convert LLVM types to ICE types, and ICE types to
+/// LLVM types.
///
//===----------------------------------------------------------------------===//
@@ -40,8 +40,8 @@
/// Context is the context to use to build llvm types.
explicit TypeConverter(llvm::LLVMContext &Context);
- /// Converts LLVM type LLVMTy to an ICE type. Returns
- /// Ice::IceType_NUM if unable to convert.
+ /// Converts LLVM type LLVMTy to an ICE type. Returns Ice::IceType_NUM if
+ /// unable to convert.
Type convertToIceType(llvm::Type *LLVMTy) const {
auto Pos = LLVM2IceMap.find(LLVMTy);
if (Pos == LLVM2IceMap.end())
diff --git a/src/IceTypes.cpp b/src/IceTypes.cpp
index dd06b1e..6cc79b7 100644
--- a/src/IceTypes.cpp
+++ b/src/IceTypes.cpp
@@ -58,8 +58,7 @@
ICETYPE_PROPS_TABLE
#undef X
-// Show vector definitions match in ICETYPE_TABLE and
-// ICETYPE_PROPS_TABLE.
+// Show vector definitions match in ICETYPE_TABLE and ICETYPE_PROPS_TABLE.
// Define constants for each element size in ICETYPE_TABLE.
enum {
diff --git a/src/IceTypes.def b/src/IceTypes.def
index 94877a2..b86dba8 100644
--- a/src/IceTypes.def
+++ b/src/IceTypes.def
@@ -7,8 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines properties of ICE primitive types in the form of
-// x-macros.
+// This file defines properties of ICE primitive types in the form of x-macros.
//
//===----------------------------------------------------------------------===//
@@ -16,10 +15,10 @@
#define SUBZERO_SRC_ICETYPES_DEF
// Attributes of each target architecture.
-// NOTE on is_elf64 -- At some point NaCl would like to use ELF32 for all
-// ILP32 sandboxes, but for now the 64-bit architectures use ELF64:
-// https://code.google.com/p/nativeclient/issues/detail?id=349
-// TODO: Whoever adds AArch64 will need to set ABI e_flags.
+// NOTE on is_elf64 -- At some point NaCl would like to use ELF32 for all ILP32
+// sandboxes, but for now the 64-bit architectures use ELF64:
+// https://code.google.com/p/nativeclient/issues/detail?id=349 TODO: Whoever
+// adds AArch64 will need to set ABI e_flags.
#define TARGETARCH_TABLE \
/* enum value, printable string, is_elf64, e_machine, e_flags */ \
X(Target_X8632, "x86-32", false, EM_386, 0) \
diff --git a/src/IceTypes.h b/src/IceTypes.h
index 3c87f68..f176e9b 100644
--- a/src/IceTypes.h
+++ b/src/IceTypes.h
@@ -8,9 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file declares a few properties of the primitive types allowed
-/// in Subzero. Every Subzero source file is expected to include
-/// IceTypes.h.
+/// This file declares a few properties of the primitive types allowed in
+/// Subzero. Every Subzero source file is expected to include IceTypes.h.
///
//===----------------------------------------------------------------------===//
@@ -42,8 +41,8 @@
return Stream << targetArchString(Arch);
}
-/// The list of all target instruction sets. Individual targets will
-/// map this to include only what is valid for the target.
+/// The list of all target instruction sets. Individual targets will map this to
+/// include only what is valid for the target.
enum TargetInstructionSet {
// Represents baseline that can be assumed for a target (usually "Begin").
BaseInstructionSet,
@@ -97,8 +96,8 @@
return result;
}
-/// Check if Ty is byte sized and specifically i8. Assert that it's not
-/// byte sized due to being an i1.
+/// Check if Ty is byte sized and specifically i8. Assert that it's not byte
+/// sized due to being an i1.
inline bool isByteSizedArithType(Type Ty) {
assert(Ty != IceType_i1);
return Ty == IceType_i8;
@@ -131,8 +130,8 @@
public:
using ArgListType = std::vector<Type>;
- /// Creates a function signature type with the given return type.
- /// Parameter types should be added using calls to appendArgType.
+ /// Creates a function signature type with the given return type. Parameter
+ /// types should be added using calls to appendArgType.
FuncSigType() = default;
FuncSigType(const FuncSigType &Ty) = default;
diff --git a/src/IceUtils.h b/src/IceUtils.h
index f07a566..9387671 100644
--- a/src/IceUtils.h
+++ b/src/IceUtils.h
@@ -19,11 +19,10 @@
namespace Ice {
-/// Similar to bit_cast, but allows copying from types of unrelated
-/// sizes. This method was introduced to enable the strict aliasing
-/// optimizations of GCC 4.4. Basically, GCC mindlessly relies on
-/// obscure details in the C++ standard that make reinterpret_cast
-/// virtually useless.
+/// Similar to bit_cast, but allows copying from types of unrelated sizes. This
+/// method was introduced to enable the strict aliasing optimizations of GCC
+/// 4.4. Basically, GCC mindlessly relies on obscure details in the C++ standard
+/// that make reinterpret_cast virtually useless.
template <class D, class S> inline D bit_copy(const S &source) {
D destination;
// This use of memcpy is safe: source and destination cannot overlap.
@@ -63,8 +62,8 @@
return IsUint(N, Value);
}
- /// Return true if the addition X + Y will cause integer overflow for
- /// integers of type T.
+ /// Return true if the addition X + Y will cause integer overflow for integers
+ /// of type T.
template <typename T> static inline bool WouldOverflowAdd(T X, T Y) {
return ((X > 0 && Y > 0 && (X > std::numeric_limits<T>::max() - Y)) ||
(X < 0 && Y < 0 && (X < std::numeric_limits<T>::min() - Y)));
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index 023a433..d8d1860 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp
@@ -41,11 +41,10 @@
namespace {
using namespace llvm;
-// Models elements in the list of types defined in the types block.
-// These elements can be undefined, a (simple) type, or a function type
-// signature. Note that an extended type is undefined on construction.
-// Use methods setAsSimpleType and setAsFuncSigType to define
-// the extended type.
+// Models elements in the list of types defined in the types block. These
+// elements can be undefined, a (simple) type, or a function type signature.
+// Note that an extended type is undefined on construction. Use methods
+// setAsSimpleType and setAsFuncSigType to define the extended type.
class ExtendedType {
ExtendedType &operator=(const ExtendedType &Ty) = delete;
@@ -61,8 +60,7 @@
ExtendedType::TypeKind getKind() const { return Kind; }
void dump(Ice::Ostream &Stream) const;
- /// Changes the extended type to a simple type with the given
- /// value.
+ /// Changes the extended type to a simple type with the given / value.
void setAsSimpleType(Ice::Type Ty) {
assert(Kind == Undefined);
Kind = Simple;
@@ -76,8 +74,8 @@
}
protected:
- // Note: For simple types, the return type of the signature will
- // be used to hold the simple type.
+ // Note: For simple types, the return type of the signature will be used to
+ // hold the simple type.
Ice::FuncSigType Signature;
private:
@@ -180,16 +178,15 @@
BlockParser = NewBlockParser;
}
- /// Generates error with given Message, occurring at BitPosition
- /// within the bitcode file. Always returns true.
+ /// Generates error with given Message, occurring at BitPosition within the
+ /// bitcode file. Always returns true.
bool ErrorAt(naclbitc::ErrorLevel Level, uint64_t BitPosition,
const std::string &Message) final;
/// Generates error message with respect to the current block parser.
bool blockError(const std::string &Message);
- /// Returns the number of errors found while parsing the bitcode
- /// file.
+ /// Returns the number of errors found while parsing the bitcode file.
unsigned getNumErrors() const { return NumErrors; }
/// Changes the size of the type list to the given size.
@@ -202,11 +199,11 @@
return Translator.getFlags().getDisableIRGeneration();
}
- /// Returns the undefined type associated with type ID.
- /// Note: Returns extended type ready to be defined.
+ /// Returns the undefined type associated with type ID. Note: Returns extended
+ /// type ready to be defined.
ExtendedType *getTypeByIDForDefining(NaClBcIndexSize_t ID) {
- // Get corresponding element, verifying the value is still undefined
- // (and hence allowed to be defined).
+ // Get corresponding element, verifying the value is still undefined (and
+ // hence allowed to be defined).
ExtendedType *Ty = getTypeByIDAsKind(ID, ExtendedType::Undefined);
if (Ty)
return Ty;
@@ -248,9 +245,9 @@
FunctionDeclarations.push_back(Fcn);
}
- /// Returns the value id that should be associated with the the
- /// current function block. Increments internal counters during call
- /// so that it will be in correct position for next function block.
+ /// Returns the value id that should be associated with the the current
+ /// function block. Increments internal counters during call so that it will
+ /// be in correct position for next function block.
NaClBcIndexSize_t getNextFunctionBlockValueID() {
size_t NumDeclaredFunctions = FunctionDeclarations.size();
while (NextDefiningFunctionID < NumDeclaredFunctions &&
@@ -274,9 +271,9 @@
return ValueIDConstants[ID];
}
- /// Install names for all global values without names. Called after
- /// the global value symbol table is processed, but before any
- /// function blocks are processed.
+ /// Install names for all global values without names. Called after the global
+ /// value symbol table is processed, but before any function blocks are
+ /// processed.
void installGlobalNames() {
assert(VariableDeclarations);
installGlobalVarNames();
@@ -294,8 +291,8 @@
/// Returns the number of function declarations in the bitcode file.
size_t getNumFunctionIDs() const { return FunctionDeclarations.size(); }
- /// Returns the number of global declarations (i.e. IDs) defined in
- /// the bitcode file.
+ /// Returns the number of global declarations (i.e. IDs) defined in the
+ /// bitcode file.
size_t getNumGlobalIDs() const {
if (VariableDeclarations) {
return FunctionDeclarations.size() + VariableDeclarations->size();
@@ -319,8 +316,8 @@
return reportGetGlobalVariableByIDError(Index);
}
- /// Returns the global declaration (variable or function) with the
- /// given Index.
+ /// Returns the global declaration (variable or function) with the given
+ /// Index.
Ice::GlobalDeclaration *getGlobalDeclarationByID(NaClBcIndexSize_t Index) {
size_t NumFunctionIds = FunctionDeclarations.size();
if (Index < NumFunctionIds)
@@ -329,13 +326,12 @@
return getGlobalVariableByID(Index - NumFunctionIds);
}
- /// Returns the list of parsed global variable
- /// declarations. Releases ownership of the current list of global
- /// variables. Note: only returns non-null pointer on first
- /// call. All successive calls return a null pointer.
+ /// Returns the list of parsed global variable declarations. Releases
+ /// ownership of the current list of global variables. Note: only returns
+ /// non-null pointer on first call. All successive calls return a null
+ /// pointer.
std::unique_ptr<Ice::VariableDeclarationList> getGlobalVariables() {
- // Before returning, check that ValidIDConstants has already been
- // built.
+ // Before returning, check that ValidIDConstants has already been built.
assert(!VariableDeclarations ||
VariableDeclarations->size() <= ValueIDConstants.size());
return std::move(VariableDeclarations);
@@ -364,16 +360,14 @@
Ice::ConstantList ValueIDConstants;
// Error recovery value to use when getFuncSigTypeByID fails.
Ice::FuncSigType UndefinedFuncSigType;
- // The block parser currently being applied. Used for error
- // reporting.
+ // The block parser currently being applied. Used for error reporting.
BlockParserBaseClass *BlockParser = nullptr;
bool ParseBlock(unsigned BlockID) override;
- // Gets extended type associated with the given index, assuming the
- // extended type is of the WantedKind. Generates error message if
- // corresponding extended type of WantedKind can't be found, and
- // returns nullptr.
+ // Gets extended type associated with the given index, assuming the extended
+ // type is of the WantedKind. Generates error message if corresponding
+ // extended type of WantedKind can't be found, and returns nullptr.
ExtendedType *getTypeByIDAsKind(NaClBcIndexSize_t ID,
ExtendedType::TypeKind WantedKind) {
ExtendedType *Ty = nullptr;
@@ -387,12 +381,11 @@
return nullptr;
}
- // Gives Decl a name if it doesn't already have one. Prefix and
- // NameIndex are used to generate the name. NameIndex is
- // automatically incremented if a new name is created. DeclType is
- // literal text describing the type of name being created. Also
- // generates warning if created names may conflict with named
- // declarations.
+ // Gives Decl a name if it doesn't already have one. Prefix and NameIndex are
+ // used to generate the name. NameIndex is automatically incremented if a new
+ // name is created. DeclType is literal text describing the type of name
+ // being created. Also generates warning if created names may conflict with
+ // named declarations.
void installDeclarationName(Ice::GlobalDeclaration *Decl,
const Ice::IceString &Prefix,
const char *DeclType,
@@ -431,7 +424,7 @@
}
// Builds a constant symbol named Name, suppressing name mangling if
- // SuppressMangling. IsExternal is true iff the symbol is external.
+ // SuppressMangling. IsExternal is true iff the symbol is external.
Ice::Constant *getConstantSym(const Ice::IceString &Name,
bool SuppressMangling, bool IsExternal) const {
if (IsExternal) {
@@ -471,17 +464,17 @@
void reportBadTypeIDAs(NaClBcIndexSize_t ID, const ExtendedType *Ty,
ExtendedType::TypeKind WantedType);
- // Reports that there is no function declaration for ID. Returns an
- // error recovery value to use.
+ // Reports that there is no function declaration for ID. Returns an error
+ // recovery value to use.
Ice::FunctionDeclaration *reportGetFunctionByIDError(NaClBcIndexSize_t ID);
- // Reports that there is not global variable declaration for
- // ID. Returns an error recovery value to use.
+ // Reports that there is not global variable declaration for ID. Returns an
+ // error recovery value to use.
Ice::VariableDeclaration *
reportGetGlobalVariableByIDError(NaClBcIndexSize_t Index);
- // Reports that there is no corresponding ICE type for LLVMTy, and
- // returns Ice::IceType_void.
+ // Reports that there is no corresponding ICE type for LLVMTy, and returns
+ // Ice::IceType_void.
Ice::Type convertToIceTypeError(Type *LLVMTy);
};
@@ -549,10 +542,9 @@
return Ice::IceType_void;
}
-// Base class for parsing blocks within the bitcode file. Note:
-// Because this is the base class of block parsers, we generate error
-// messages if ParseBlock or ParseRecord is not overridden in derived
-// classes.
+// Base class for parsing blocks within the bitcode file. Note: Because this is
+// the base class of block parsers, we generate error messages if ParseBlock or
+// ParseRecord is not overridden in derived classes.
class BlockParserBaseClass : public NaClBitcodeParser {
BlockParserBaseClass() = delete;
BlockParserBaseClass(const BlockParserBaseClass &) = delete;
@@ -595,16 +587,15 @@
return getTranslator().getFlags().getDisableIRGeneration();
}
- // Default implementation. Reports that block is unknown and skips
- // its contents.
+ // Default implementation. Reports that block is unknown and skips its
+ // contents.
bool ParseBlock(unsigned BlockID) override;
- // Default implementation. Reports that the record is not
- // understood.
+ // Default implementation. Reports that the record is not understood.
void ProcessRecord() override;
- // Checks if the size of the record is Size. Return true if valid.
- // Otherwise generates an error and returns false.
+ // Checks if the size of the record is Size. Return true if valid. Otherwise
+ // generates an error and returns false.
bool isValidRecordSize(size_t Size, const char *RecordName) {
const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
if (Values.size() == Size)
@@ -613,9 +604,8 @@
return false;
}
- // Checks if the size of the record is at least as large as the
- // LowerLimit. Returns true if valid. Otherwise generates an error
- // and returns false.
+ // Checks if the size of the record is at least as large as the LowerLimit.
+ // Returns true if valid. Otherwise generates an error and returns false.
bool isValidRecordSizeAtLeast(size_t LowerLimit, const char *RecordName) {
const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
if (Values.size() >= LowerLimit)
@@ -625,8 +615,8 @@
}
// Checks if the size of the record is no larger than the
- // UpperLimit. Returns true if valid. Otherwise generates an error
- // and returns false.
+ // UpperLimit. Returns true if valid. Otherwise generates an error and
+ // returns false.
bool isValidRecordSizeAtMost(size_t UpperLimit, const char *RecordName) {
const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
if (Values.size() <= UpperLimit)
@@ -635,9 +625,9 @@
return false;
}
- // Checks if the size of the record is at least as large as the
- // LowerLimit, and no larger than the UpperLimit. Returns true if
- // valid. Otherwise generates an error and returns false.
+ // Checks if the size of the record is at least as large as the LowerLimit,
+ // and no larger than the UpperLimit. Returns true if valid. Otherwise
+ // generates an error and returns false.
bool isValidRecordSizeInRange(size_t LowerLimit, size_t UpperLimit,
const char *RecordName) {
return isValidRecordSizeAtLeast(LowerLimit, RecordName) ||
@@ -645,11 +635,10 @@
}
private:
- /// Generates a record size error. ExpectedSize is the number
- /// of elements expected. RecordName is the name of the kind of
- /// record that has incorrect size. ContextMessage (if not nullptr)
- /// is appended to "record expects" to describe how ExpectedSize
- /// should be interpreted.
+ /// Generates a record size error. ExpectedSize is the number of elements
+ /// expected. RecordName is the name of the kind of record that has incorrect
+ /// size. ContextMessage (if not nullptr) is appended to "record expects" to
+ /// describe how ExpectedSize should be interpreted.
void reportRecordSizeError(size_t ExpectedSize, const char *RecordName,
const char *ContextMessage);
};
@@ -666,9 +655,9 @@
const std::string &Message) {
std::string Buffer;
raw_string_ostream StrBuf(Buffer);
- // Note: If dump routines have been turned off, the error messages
- // will not be readable. Hence, replace with simple error. We also
- // use the simple form for unit tests.
+ // Note: If dump routines have been turned off, the error messages will not
+ // be readable. Hence, replace with simple error. We also use the simple form
+ // for unit tests.
if (getFlags().getGenerateUnitTestMessages()) {
StrBuf << "Invalid " << getBlockName() << " record: <" << Record.GetCode();
for (const uint64_t Val : Record.GetValues()) {
@@ -700,8 +689,8 @@
}
bool BlockParserBaseClass::ParseBlock(unsigned BlockID) {
- // If called, derived class doesn't know how to handle block.
- // Report error and skip.
+ // If called, derived class doesn't know how to handle block. Report error
+ // and skip.
std::string Buffer;
raw_string_ostream StrBuf(Buffer);
StrBuf << "Don't know how to parse block id: " << BlockID;
@@ -742,8 +731,8 @@
private:
Ice::TimerMarker Timer;
- // The type ID that will be associated with the next type defining
- // record in the types block.
+ // The type ID that will be associated with the next type defining record in
+ // the types block.
NaClBcIndexSize_t NextTypeId = 0;
// The expected number of types, based on record TYPE_CODE_NUMENTRY.
@@ -773,13 +762,11 @@
Error(StrBuf.str());
ExpectedNumTypes = NaClBcIndexSize_t_Max;
}
- // The code double checks that Expected size and the actual size
- // at the end of the block. To reduce allocations we preallocate
- // the space.
+ // The code double checks that Expected size and the actual size at the end
+ // of the block. To reduce allocations we preallocate the space.
//
- // However, if the number is large, we suspect that the number
- // is (possibly) incorrect. In that case, we preallocate a
- // smaller space.
+ // However, if the number is large, we suspect that the number is
+ // (possibly) incorrect. In that case, we preallocate a smaller space.
constexpr uint64_t DefaultLargeResizeValue = 1000000;
Context->resizeTypeIDValues(std::min(Size, DefaultLargeResizeValue));
ExpectedNumTypes = Size;
@@ -902,9 +889,9 @@
FuncSigExtendedType *FuncTy = cast<FuncSigExtendedType>(Ty);
FuncTy->setReturnType(Context->getSimpleTypeByID(Values[1]));
for (size_t i = 2, e = Values.size(); i != e; ++i) {
- // Check that type void not used as argument type.
- // Note: PNaCl restrictions can't be checked until we
- // know the name, because we have to check for intrinsic signatures.
+ // Check that type void not used as argument type. Note: PNaCl
+ // restrictions can't be checked until we know the name, because we have
+ // to check for intrinsic signatures.
Ice::Type ArgTy = Context->getSimpleTypeByID(Values[i]);
if (ArgTy == Ice::IceType_void) {
std::string Buffer;
@@ -956,8 +943,8 @@
// Holds the number of defined function IDs.
NaClBcIndexSize_t NumFunctionIDs;
- // Holds the specified number of global variables by the count record in
- // the global variables block.
+ // Holds the specified number of global variables by the count record in the
+ // global variables block.
NaClBcIndexSize_t SpecifiedNumberVars = 0;
// Keeps track of how many initializers are expected for the global variable
@@ -967,9 +954,8 @@
// The index of the next global variable declaration.
NaClBcIndexSize_t NextGlobalID = 0;
- // Dummy global variable declaration to guarantee CurGlobalVar is
- // always defined (allowing code to not need to check if
- // CurGlobalVar is nullptr).
+ // Dummy global variable declaration to guarantee CurGlobalVar is always
+ // defined (allowing code to not need to check if CurGlobalVar is nullptr).
Ice::VariableDeclaration *DummyGlobalVar;
// Holds the current global variable declaration being built.
@@ -1230,14 +1216,13 @@
getTranslator().getContext()->pushTimer(TimerID, StackID);
}
- // Note: The Cfg is created, even when IR generation is disabled. This
- // is done to install a CfgLocalAllocator for various internal containers.
+ // Note: The Cfg is created, even when IR generation is disabled. This is
+ // done to install a CfgLocalAllocator for various internal containers.
Func = Ice::Cfg::create(getTranslator().getContext(),
getTranslator().getNextSequenceNumber());
Ice::Cfg::setCurrentCfg(Func.get());
- // TODO(kschimpf) Clean up API to add a function signature to
- // a CFG.
+ // TODO(kschimpf) Clean up API to add a function signature to a CFG.
const Ice::FuncSigType &Signature = FuncDecl->getSignature();
if (isIRGenerationDisabled()) {
CurrentNode = nullptr;
@@ -1257,17 +1242,17 @@
}
bool ParserResult = ParseThisBlock();
- // Temporarily end per-function timing, which will be resumed by
- // the translator function. This is because translation may be
- // done asynchronously in a separate thread.
+ // Temporarily end per-function timing, which will be resumed by the
+ // translator function. This is because translation may be done
+ // asynchronously in a separate thread.
if (TimeThisFunction)
getTranslator().getContext()->popTimer(TimerID, StackID);
Ice::Cfg::setCurrentCfg(nullptr);
- // Note: Once any errors have been found, we turn off all
- // translation of all remaining functions. This allows successive
- // parsing errors to be reported, without adding extra checks to
- // the translator for such parsing errors.
+ // Note: Once any errors have been found, we turn off all translation of
+ // all remaining functions. This allows successive parsing errors to be
+ // reported, without adding extra checks to the translator for such parsing
+ // errors.
if (Context->getNumErrors() == 0 && Func) {
getTranslator().translateFcn(std::move(Func));
// The translator now has ownership of Func.
@@ -1332,21 +1317,20 @@
Ice::FunctionDeclaration *FuncDecl;
// Holds the dividing point between local and global absolute value indices.
size_t CachedNumGlobalValueIDs;
- // Holds operands local to the function block, based on indices
- // defined in the bitcode file.
+ // Holds operands local to the function block, based on indices defined in
+ // the bitcode file.
Ice::OperandList LocalOperands;
- // Holds the index within LocalOperands corresponding to the next
- // instruction that generates a value.
+ // Holds the index within LocalOperands corresponding to the next instruction
+ // that generates a value.
NaClBcIndexSize_t NextLocalInstIndex;
- // True if the last processed instruction was a terminating
- // instruction.
+ // True if the last processed instruction was a terminating instruction.
bool InstIsTerminating = false;
// Upper limit of alignment power allowed by LLVM
static const uint32_t AlignPowerLimit = 29;
- // Extracts the corresponding Alignment to use, given the AlignPower
- // (i.e. 2**(AlignPower-1), or 0 if AlignPower == 0). InstName is the
- // name of the instruction the alignment appears in.
+ // Extracts the corresponding Alignment to use, given the AlignPower (i.e.
+ // 2**(AlignPower-1), or 0 if AlignPower == 0). InstName is the name of the
+ // instruction the alignment appears in.
void extractAlignment(const char *InstName, uint32_t AlignPower,
uint32_t &Alignment) {
if (AlignPower <= AlignPowerLimit + 1) {
@@ -1396,10 +1380,9 @@
return Func->getNodes()[Index];
}
- // Returns the Index-th basic block in the list of basic blocks.
- // Assumes Index corresponds to a branch instruction. Hence, if
- // the branch references the entry block, it also generates a
- // corresponding error.
+ // Returns the Index-th basic block in the list of basic blocks. Assumes
+ // Index corresponds to a branch instruction. Hence, if the branch references
+ // the entry block, it also generates a corresponding error.
Ice::CfgNode *getBranchBasicBlock(NaClBcIndexSize_t Index) {
assert(!isIRGenerationDisabled());
if (Index == 0) {
@@ -1448,8 +1431,7 @@
return Var;
}
- // Converts a relative index (wrt to BaseIndex) to an absolute value
- // index.
+ // Converts a relative index (wrt to BaseIndex) to an absolute value index.
NaClBcIndexSize_t convertRelativeToAbsIndex(NaClRelBcIndexSize_t Id,
NaClRelBcIndexSize_t BaseIndex) {
if (BaseIndex < Id) {
@@ -1508,8 +1490,8 @@
LocalOperands[LocalIndex] = Op;
}
- // Returns the relative operand (wrt to BaseIndex) referenced by
- // the given value Index.
+ // Returns the relative operand (wrt to BaseIndex) referenced by the given
+ // value Index.
Ice::Operand *getRelativeOperand(NaClBcIndexSize_t Index,
NaClBcIndexSize_t BaseIndex) {
return getOperand(convertRelativeToAbsIndex(Index, BaseIndex));
@@ -1518,13 +1500,12 @@
// Returns the absolute index of the next value generating instruction.
NaClBcIndexSize_t getNextInstIndex() const { return NextLocalInstIndex; }
- // Generates type error message for binary operator Op
- // operating on Type OpTy.
+ // Generates type error message for binary operator Op operating on Type
+ // OpTy.
void reportInvalidBinaryOp(Ice::InstArithmetic::OpKind Op, Ice::Type OpTy);
- // Validates if integer logical Op, for type OpTy, is valid.
- // Returns true if valid. Otherwise generates error message and
- // returns false.
+ // Validates if integer logical Op, for type OpTy, is valid. Returns true if
+ // valid. Otherwise generates error message and returns false.
bool isValidIntegerLogicalOp(Ice::InstArithmetic::OpKind Op, Ice::Type OpTy) {
if (Ice::isIntegerType(OpTy))
return true;
@@ -1532,9 +1513,9 @@
return false;
}
- // Validates if integer (or vector of integers) arithmetic Op, for type
- // OpTy, is valid. Returns true if valid. Otherwise generates
- // error message and returns false.
+ // Validates if integer (or vector of integers) arithmetic Op, for type OpTy,
+ // is valid. Returns true if valid. Otherwise generates error message and
+ // returns false.
bool isValidIntegerArithOp(Ice::InstArithmetic::OpKind Op, Ice::Type OpTy) {
if (Ice::isIntegerArithmeticType(OpTy))
return true;
@@ -1542,9 +1523,8 @@
return false;
}
- // Checks if floating arithmetic Op, for type OpTy, is valid.
- // Returns true if valid. Otherwise generates an error message and
- // returns false;
+ // Checks if floating arithmetic Op, for type OpTy, is valid. Returns true if
+ // valid. Otherwise generates an error message and returns false;
bool isValidFloatingArithOp(Ice::InstArithmetic::OpKind Op, Ice::Type OpTy) {
if (Ice::isFloatingType(OpTy))
return true;
@@ -1552,9 +1532,9 @@
return false;
}
- // Checks if the type of operand Op is the valid pointer type, for
- // the given InstructionName. Returns true if valid. Otherwise
- // generates an error message and returns false.
+ // Checks if the type of operand Op is the valid pointer type, for the given
+ // InstructionName. Returns true if valid. Otherwise generates an error
+ // message and returns false.
bool isValidPointerType(Ice::Operand *Op, const char *InstructionName) {
Ice::Type PtrType = Ice::getPointerType();
if (Op->getType() == PtrType)
@@ -1567,9 +1547,8 @@
return false;
}
- // Checks if loading/storing a value of type Ty is allowed.
- // Returns true if Valid. Otherwise generates an error message and
- // returns false.
+ // Checks if loading/storing a value of type Ty is allowed. Returns true if
+ // Valid. Otherwise generates an error message and returns false.
bool isValidLoadStoreType(Ice::Type Ty, const char *InstructionName) {
if (isLoadStoreType(Ty))
return true;
@@ -1580,9 +1559,8 @@
return false;
}
- // Checks if loading/storing a value of type Ty is allowed for
- // the given Alignment. Otherwise generates an error message and
- // returns false.
+ // Checks if loading/storing a value of type Ty is allowed for the given
+ // Alignment. Otherwise generates an error message and returns false.
bool isValidLoadStoreAlignment(size_t Alignment, Ice::Type Ty,
const char *InstructionName) {
if (!isValidLoadStoreType(Ty, InstructionName))
@@ -1598,8 +1576,8 @@
}
// Defines if the given alignment is valid for the given type. Simplified
- // version of PNaClABIProps::isAllowedAlignment, based on API's offered
- // for Ice::Type.
+ // version of PNaClABIProps::isAllowedAlignment, based on API's offered for
+ // Ice::Type.
bool isAllowedAlignment(size_t Alignment, Ice::Type Ty) const {
return Alignment == typeAlignInBytes(Ty) ||
(Alignment == 1 && !isVectorType(Ty));
@@ -1655,9 +1633,9 @@
return VectorIndexValid;
}
- // Takes the PNaCl bitcode binary operator Opcode, and the opcode
- // type Ty, and sets Op to the corresponding ICE binary
- // opcode. Returns true if able to convert, false otherwise.
+ // Takes the PNaCl bitcode binary operator Opcode, and the opcode type Ty,
+ // and sets Op to the corresponding ICE binary opcode. Returns true if able
+ // to convert, false otherwise.
bool convertBinopOpcode(unsigned Opcode, Ice::Type Ty,
Ice::InstArithmetic::OpKind &Op) {
switch (Opcode) {
@@ -1736,9 +1714,9 @@
}
}
- /// Simplifies out vector types from Type1 and Type2, if both are vectors
- /// of the same size. Returns true iff both are vectors of the same size,
- /// or are both scalar types.
+ /// Simplifies out vector types from Type1 and Type2, if both are vectors of
+ /// the same size. Returns true iff both are vectors of the same size, or are
+ /// both scalar types.
static bool simplifyOutCommonVectorType(Ice::Type &Type1, Ice::Type &Type2) {
bool IsType1Vector = isVectorType(Type1);
bool IsType2Vector = isVectorType(Type2);
@@ -1781,8 +1759,8 @@
return isFloatTruncCastValid(TargetType, SourceType);
}
- /// Returns true iff a cast from floating type SourceType to integer
- /// type TargetType is valid.
+ /// Returns true iff a cast from floating type SourceType to integer type
+ /// TargetType is valid.
static bool isFloatToIntCastValid(Ice::Type SourceType,
Ice::Type TargetType) {
if (!(Ice::isFloatingType(SourceType) && Ice::isIntegerType(TargetType)))
@@ -1797,15 +1775,15 @@
return true;
}
- /// Returns true iff a cast from integer type SourceType to floating
- /// type TargetType is valid.
+ /// Returns true iff a cast from integer type SourceType to floating type
+ /// TargetType is valid.
static bool isIntToFloatCastValid(Ice::Type SourceType,
Ice::Type TargetType) {
return isFloatToIntCastValid(TargetType, SourceType);
}
- /// Returns the number of bits used to model type Ty when defining the
- /// bitcast instruction.
+ /// Returns the number of bits used to model type Ty when defining the bitcast
+ /// instruction.
static Ice::SizeT bitcastSizeInBits(Ice::Type Ty) {
if (Ice::isVectorType(Ty))
return Ice::typeNumElements(Ty) *
@@ -1820,10 +1798,10 @@
return bitcastSizeInBits(SourceType) == bitcastSizeInBits(TargetType);
}
- /// Returns true iff the NaCl bitcode Opcode is a valid cast opcode
- /// for converting SourceType to TargetType. Updates CastKind to the
- /// corresponding instruction cast opcode. Also generates an error
- /// message when this function returns false.
+ /// Returns true iff the NaCl bitcode Opcode is a valid cast opcode for
+ /// converting SourceType to TargetType. Updates CastKind to the corresponding
+ /// instruction cast opcode. Also generates an error message when this
+ /// function returns false.
bool convertCastOpToIceOp(uint64_t Opcode, Ice::Type SourceType,
Ice::Type TargetType,
Ice::InstCast::OpKind &CastKind) {
@@ -1888,8 +1866,8 @@
return Result;
}
- // Converts PNaCl bitcode Icmp operator to corresponding ICE op.
- // Returns true if able to convert, false otherwise.
+ // Converts PNaCl bitcode Icmp operator to corresponding ICE op. Returns true
+ // if able to convert, false otherwise.
bool convertNaClBitcICmpOpToIce(uint64_t Op,
Ice::InstIcmp::ICond &Cond) const {
switch (Op) {
@@ -1930,8 +1908,8 @@
}
}
- // Converts PNaCl bitcode Fcmp operator to corresponding ICE op.
- // Returns true if able to convert, false otherwise.
+ // Converts PNaCl bitcode Fcmp operator to corresponding ICE op. Returns true
+ // if able to convert, false otherwise.
bool convertNaClBitcFCompOpToIce(uint64_t Op,
Ice::InstFcmp::FCond &Cond) const {
switch (Op) {
@@ -1990,15 +1968,14 @@
}
}
- // Creates an error instruction, generating a value of type Ty, and
- // adds a placeholder so that instruction indices line up.
- // Some instructions, such as a call, will not generate a value
- // if the return type is void. In such cases, a placeholder value
- // for the badly formed instruction is not needed. Hence, if Ty is
- // void, an error instruction is not appended.
+ // Creates an error instruction, generating a value of type Ty, and adds a
+ // placeholder so that instruction indices line up. Some instructions, such
+ // as a call, will not generate a value if the return type is void. In such
+ // cases, a placeholder value for the badly formed instruction is not needed.
+ // Hence, if Ty is void, an error instruction is not appended.
void appendErrorInstruction(Ice::Type Ty) {
- // Note: we don't worry about downstream translation errors because
- // the function will not be translated if any errors occur.
+ // Note: we don't worry about downstream translation errors because the
+ // function will not be translated if any errors occur.
if (Ty == Ice::IceType_void)
return;
Ice::Variable *Var = getNextInstVar(Ty);
@@ -2025,8 +2002,8 @@
}
if (isIRGenerationDisabled())
return;
- // Before translating, check for blocks without instructions, and
- // insert unreachable. This shouldn't happen, but be safe.
+ // Before translating, check for blocks without instructions, and insert
+ // unreachable. This shouldn't happen, but be safe.
size_t Index = 0;
for (Ice::CfgNode *Node : Func->getNodes()) {
if (Node->getInsts().empty()) {
@@ -2051,8 +2028,8 @@
}
void FunctionParser::ProcessRecord() {
- // Note: To better separate parse/IR generation times, when IR generation
- // is disabled we do the following:
+ // Note: To better separate parse/IR generation times, when IR generation is
+ // disabled we do the following:
// 1) Delay exiting until after we extract operands.
// 2) return before we access operands, since all operands will be a nullptr.
const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
@@ -2382,11 +2359,10 @@
// SWITCH: [Condty, Cond, BbIndex, NumCases Case ...]
// where Case = [1, 1, Value, BbIndex].
//
- // Note: Unlike most instructions, we don't infer the type of
- // Cond, but provide it as a separate field. There are also
- // unnecesary data fields (i.e. constants 1). These were not
- // cleaned up in PNaCl bitcode because the bitcode format was
- // already frozen when the problem was noticed.
+ // Note: Unlike most instructions, we don't infer the type of Cond, but
+ // provide it as a separate field. There are also unnecessary data fields
+ // (i.e. constants 1). These were not cleaned up in PNaCl bitcode because
+ // the bitcode format was already frozen when the problem was noticed.
InstIsTerminating = true;
if (!isValidRecordSizeAtLeast(4, "switch"))
return;
@@ -2591,13 +2567,12 @@
// CALL: [cc, fnid, arg0, arg1...]
// CALL_INDIRECT: [cc, fn, returnty, args...]
//
- // Note: The difference between CALL and CALL_INDIRECT is that
- // CALL has a reference to an explicit function declaration, while
- // the CALL_INDIRECT is just an address. For CALL, we can infer
- // the return type by looking up the type signature associated
- // with the function declaration. For CALL_INDIRECT we can only
- // infer the type signature via argument types, and the
- // corresponding return type stored in CALL_INDIRECT record.
+ // Note: The difference between CALL and CALL_INDIRECT is that CALL has a
+ // reference to an explicit function declaration, while the CALL_INDIRECT
+ // is just an address. For CALL, we can infer the return type by looking up
+ // the type signature associated with the function declaration. For
+ // CALL_INDIRECT we can only infer the type signature via argument types,
+ // and the corresponding return type stored in CALL_INDIRECT record.
Ice::SizeT ParamsStartIndex = 2;
if (Record.GetCode() == naclbitc::FUNC_CODE_INST_CALL) {
if (!isValidRecordSizeAtLeast(2, "call"))
@@ -2763,8 +2738,8 @@
Ice::GlobalContext *getContext() { return getTranslator().getContext(); }
- // Returns true if the type to use for succeeding constants is defined.
- // If false, also generates an error message.
+ // Returns true if the type to use for succeeding constants is defined. If
+ // false, also generates an error message.
bool isValidNextConstantType() {
if (NextConstantType != Ice::IceType_void)
return true;
@@ -2887,8 +2862,8 @@
void setValueName(NaClBcIndexSize_t Index, StringType &Name) override;
void setBbName(NaClBcIndexSize_t Index, StringType &Name) override;
- // Reports that the assignment of Name to the value associated with
- // index is not possible, for the given Context.
+ // Reports that the assignment of Name to the value associated with index is
+ // not possible, for the given Context.
void reportUnableToAssign(const char *Context, NaClBcIndexSize_t Index,
StringType &Name) {
std::string Buffer;
@@ -2976,10 +2951,10 @@
// and have generated global constant initializers.
bool GlobalDeclarationNamesAndInitializersInstalled = false;
- // Generates names for unnamed global addresses (i.e. functions and
- // global variables). Then lowers global variable declaration
- // initializers to the target. May be called multiple times. Only
- // the first call will do the installation.
+ // Generates names for unnamed global addresses (i.e. functions and global
+ // variables). Then lowers global variable declaration initializers to the
+ // target. May be called multiple times. Only the first call will do the
+ // installation.
void installGlobalNamesAndGlobalVarInitializers() {
if (!GlobalDeclarationNamesAndInitializersInstalled) {
Context->installGlobalNames();
@@ -3130,11 +3105,11 @@
void PNaClTranslator::translate(const std::string &IRFilename,
std::unique_ptr<MemoryObject> &&MemObj) {
- // On error, we report_fatal_error to avoid destroying the MemObj.
- // That may still be in use by IceBrowserCompileServer. Otherwise,
- // we need to change the MemObj to be ref-counted, or have a wrapper,
- // or simply leak. We also need a hook to tell the IceBrowserCompileServer
- // to unblock its QueueStreamer.
+ // On error, we report_fatal_error to avoid destroying the MemObj. That may
+ // still be in use by IceBrowserCompileServer. Otherwise, we need to change
+ // the MemObj to be ref-counted, or have a wrapper, or simply leak. We also
+ // need a hook to tell the IceBrowserCompileServer to unblock its
+ // QueueStreamer.
// https://code.google.com/p/nativeclient/issues/detail?id=4163
Ostream &ErrStream = getContext()->getStrError();
// Read header and verify it is good.
diff --git a/src/PNaClTranslator.h b/src/PNaClTranslator.h
index 24a627b..8a045ad 100644
--- a/src/PNaClTranslator.h
+++ b/src/PNaClTranslator.h
@@ -37,8 +37,8 @@
~PNaClTranslator() override = default;
/// Reads the PNaCl bitcode file and translates to ICE, which is then
- /// converted to machine code. Sets ErrorStatus to 1 if any errors
- /// occurred. Takes ownership of the MemoryObject.
+ /// converted to machine code. Sets ErrorStatus to 1 if any errors occurred.
+ /// Takes ownership of the MemoryObject.
void translate(const std::string &IRFilename,
std::unique_ptr<llvm::MemoryObject> &&MemoryObject);
diff --git a/src/README.SIMD.rst b/src/README.SIMD.rst
index 58f25d9..f8cf08f 100644
--- a/src/README.SIMD.rst
+++ b/src/README.SIMD.rst
@@ -1,13 +1,14 @@
Missing support
===============
-* The PNaCl LLVM backend expands shufflevector operations into
- sequences of insertelement and extractelement operations. For
- instance:
+* The PNaCl LLVM backend expands shufflevector operations into sequences of
+ insertelement and extractelement operations. For instance:
define <4 x i32> @shuffle(<4 x i32> %arg1, <4 x i32> %arg2) {
entry:
- %res = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+ %res = shufflevector <4 x i32> %arg1,
+ <4 x i32> %arg2,
+ <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x i32> %res
}
@@ -30,38 +31,34 @@
shuffle operations where appropriate.
* Add support for vector constants in the backend. The current code
- materializes the vector constants it needs (eg. for performing icmp
- on unsigned operands) using register operations, but this should be
- changed to loading them from a constant pool if the register
- initialization is too complicated (such as in
- TargetX8632::makeVectorOfHighOrderBits()).
+ materializes the vector constants it needs (eg. for performing icmp on
+ unsigned operands) using register operations, but this should be changed to
+ loading them from a constant pool if the register initialization is too
+ complicated (such as in TargetX8632::makeVectorOfHighOrderBits()).
-* [x86 specific] llvm-mc does not allow lea to take a mem128 memory
- operand when assembling x86-32 code. The current
- InstX8632Lea::emit() code uses Variable::asType() to convert any
- mem128 Variables into a compatible memory operand type. However, the
- emit code does not do any conversions of OperandX8632Mem, so if an
- OperandX8632Mem is passed to lea as mem128 the resulting code will
- not assemble. One way to fix this is by implementing
+* [x86 specific] llvm-mc does not allow lea to take a mem128 memory operand
+ when assembling x86-32 code. The current InstX8632Lea::emit() code uses
+ Variable::asType() to convert any mem128 Variables into a compatible memory
+ operand type. However, the emit code does not do any conversions of
+ OperandX8632Mem, so if an OperandX8632Mem is passed to lea as mem128 the
+ resulting code will not assemble. One way to fix this is by implementing
OperandX8632Mem::asType().
-* [x86 specific] Lower shl with <4 x i32> using some clever float
- conversion:
+* [x86 specific] Lower shl with <4 x i32> using some clever float conversion:
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20100726/105087.html
-* [x86 specific] Add support for using aligned mov operations
- (movaps). This will require passing alignment information to loads
- and stores.
+* [x86 specific] Add support for using aligned mov operations (movaps). This
+ will require passing alignment information to loads and stores.
x86 SIMD Diversification
========================
-* Vector "bitwise" operations have several variant instructions: the
- AND operation can be implemented with pand, andpd, or andps. This
- pattern also holds for ANDN, OR, and XOR.
+* Vector "bitwise" operations have several variant instructions: the AND
+ operation can be implemented with pand, andpd, or andps. This pattern also
+ holds for ANDN, OR, and XOR.
-* Vector "mov" instructions can be diversified (eg. movdqu instead of
- movups) at the cost of a possible performance penalty.
+* Vector "mov" instructions can be diversified (eg. movdqu instead of movups)
+ at the cost of a possible performance penalty.
-* Scalar FP arithmetic can be diversified by performing the operations
- with the vector version of the instructions.
+* Scalar FP arithmetic can be diversified by performing the operations with the
+ vector version of the instructions.
diff --git a/src/main.cpp b/src/main.cpp
index a6cb490..fa5bb06 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file defines the entry point for translating PNaCl bitcode into
-/// native code.
+/// This file defines the entry point for translating PNaCl bitcode into native
+/// code.
///
//===----------------------------------------------------------------------===//
@@ -22,8 +22,8 @@
Ice::Compiler Comp;
// Can only compile the BrowserCompileServer w/ the NaCl compiler.
#if PNACL_BROWSER_TRANSLATOR
- // There are no real commandline arguments in the browser case.
- // They are supplied via IPC.
+ // There are no real commandline arguments in the browser case. They are
+ // supplied via IPC.
assert(argc == 1);
(void)argc;
(void)argv;