Reflow comments to use the full width. BUG= R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1341423002 .
diff --git a/src/IceAPFloat.h b/src/IceAPFloat.h index ccfb7f3..9aed889 100644 --- a/src/IceAPFloat.h +++ b/src/IceAPFloat.h
@@ -11,8 +11,8 @@ /// \brief This file implements a class to represent Subzero float and double /// values. /// -/// Note: This is a simplified version of -/// llvm/include/llvm/ADT/APFloat.h for use with Subzero. +/// Note: This is a simplified version of llvm/include/llvm/ADT/APFloat.h for +/// use with Subzero. //===----------------------------------------------------------------------===// #ifndef SUBZERO_SRC_ICEAPFLOAT_H
diff --git a/src/IceAssembler.cpp b/src/IceAssembler.cpp index 5c1760b..9c77dce 100644 --- a/src/IceAssembler.cpp +++ b/src/IceAssembler.cpp
@@ -48,13 +48,13 @@ } void AssemblerBuffer::EnsureCapacity::validate(AssemblerBuffer *buffer) { - // In debug mode, we save the assembler buffer along with the gap - // size before we start emitting to the buffer. This allows us to - // check that any single generated instruction doesn't overflow the - // limit implied by the minimum gap size. + // In debug mode, we save the assembler buffer along with the gap size before + // we start emitting to the buffer. This allows us to check that any single + // generated instruction doesn't overflow the limit implied by the minimum + // gap size. Gap = computeGap(); - // Make sure that extending the capacity leaves a big enough gap - // for any kind of instruction. + // Make sure that extending the capacity leaves a big enough gap for any kind + // of instruction. assert(Gap >= kMinimumGap); // Mark the buffer as having ensured the capacity. assert(!buffer->hasEnsuredCapacity()); // Cannot nest. @@ -64,8 +64,8 @@ AssemblerBuffer::EnsureCapacity::~EnsureCapacity() { // Unmark the buffer, so we cannot emit after this. Buffer->HasEnsuredCapacity = false; - // Make sure the generated instruction doesn't take up more - // space than the minimum gap. + // Make sure the generated instruction doesn't take up more space than the + // minimum gap. intptr_t delta = Gap - computeGap(); (void)delta; assert(delta <= kMinimumGap); @@ -133,9 +133,9 @@ } Str << "\t.long "; // For PCRel fixups, we write the pc-offset from a symbol into the Buffer - // (e.g., -4), but we don't represent that in the fixup's offset. - // Otherwise the fixup holds the true offset, and so does the Buffer. - // Just load the offset from the buffer. + // (e.g., -4), but we don't represent that in the fixup's offset. Otherwise + // the fixup holds the true offset, and so does the Buffer. Just load the + // offset from the buffer. NextFixup->emit(Ctx, Buffer.load<RelocOffsetT>(NextFixupLoc)); if (fixupIsPCRel(NextFixup->kind())) Str << " - .";
diff --git a/src/IceAssembler.h b/src/IceAssembler.h index 5b07975..8247e66 100644 --- a/src/IceAssembler.h +++ b/src/IceAssembler.h
@@ -15,9 +15,9 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares the Assembler base class. Instructions are assembled -/// by architecture-specific assemblers that derive from this base class. -/// This base class manages buffers and fixups for emitting code, etc. +/// This file declares the Assembler base class. Instructions are assembled by +/// architecture-specific assemblers that derive from this base class. This base +/// class manages buffers and fixups for emitting code, etc. /// //===----------------------------------------------------------------------===// @@ -55,7 +55,7 @@ } /// Returns the position of an earlier branch instruction that was linked to - /// this label (branches that use this are considered forward branches). The + /// this label (branches that use this are considered forward branches). The /// linked instructions form a linked list, of sorts, using the instruction's /// displacement field for the location of the next instruction that is also /// linked to this label. @@ -200,8 +200,8 @@ return (Limit - Contents) + kMinimumGap; } - /// Compute the limit based on the data area and the capacity. See - /// description of kMinimumGap for the reasoning behind the value. + /// Compute the limit based on the data area and the capacity. See description + /// of kMinimumGap for the reasoning behind the value. static uintptr_t computeLimit(uintptr_t Data, intptr_t Capacity) { return Data + Capacity - kMinimumGap; } @@ -226,12 +226,12 @@ /// Allocate a chunk of bytes using the per-Assembler allocator. uintptr_t allocateBytes(size_t bytes) { - // For now, alignment is not related to NaCl bundle alignment, since - // the buffer's GetPosition is relative to the base. So NaCl bundle - // alignment checks can be relative to that base. Later, the buffer - // will be copied out to a ".text" section (or an in memory-buffer - // that can be mprotect'ed with executable permission), and that - // second buffer should be aligned for NaCl. + // For now, alignment is not related to NaCl bundle alignment, since the + // buffer's GetPosition is relative to the base. So NaCl bundle alignment + // checks can be relative to that base. Later, the buffer will be copied + // out to a ".text" section (or an in memory-buffer that can be mprotect'ed + // with executable permission), and that second buffer should be aligned + // for NaCl. const size_t Alignment = 16; return reinterpret_cast<uintptr_t>(Allocator.Allocate(bytes, Alignment)); } @@ -257,8 +257,8 @@ /// Get the label for a CfgNode. virtual Label *getCfgNodeLabel(SizeT NodeNumber) = 0; - /// Mark the current text location as the start of a CFG node - /// (represented by NodeNumber). + /// Mark the current text location as the start of a CFG node (represented by + /// NodeNumber). virtual void bindCfgNodeLabel(SizeT NodeNumber) = 0; virtual bool fixupIsPCRel(FixupKind Kind) const = 0; @@ -293,15 +293,15 @@ const AssemblerKind Kind; ArenaAllocator<32 * 1024> Allocator; - /// FunctionName and IsInternal are transferred from the original Cfg - /// object, since the Cfg object may be deleted by the time the - /// assembler buffer is emitted. + /// FunctionName and IsInternal are transferred from the original Cfg object, + /// since the Cfg object may be deleted by the time the assembler buffer is + /// emitted. IceString FunctionName = ""; bool IsInternal = false; - /// Preliminary indicates whether a preliminary pass is being made - /// for calculating bundle padding (Preliminary=true), versus the - /// final pass where all changes to label bindings, label links, and - /// relocation fixups are fully committed (Preliminary=false). + /// Preliminary indicates whether a preliminary pass is being made for + /// calculating bundle padding (Preliminary=true), versus the final pass where + /// all changes to label bindings, label links, and relocation fixups are + /// fully committed (Preliminary=false). bool Preliminary = false; protected:
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h index ab1a29d..7c4be6f 100644 --- a/src/IceAssemblerX86Base.h +++ b/src/IceAssemblerX86Base.h
@@ -195,8 +195,8 @@ Type, typename Traits::GPRRegister, typename Traits::GPRRegister, const Immediate &); struct GPREmitterShiftD { - // Technically AddrGPR and AddrGPRImm are also allowed, but in practice - // we always normalize Dest to a Register first. + // Technically AddrGPR and AddrGPRImm are also allowed, but in practice we + // always normalize Dest to a Register first. TypedEmitGPRGPR GPRGPR; TypedEmitGPRGPRImm GPRGPRImm; }; @@ -252,8 +252,8 @@ TypedEmitAddr RegAddr; }; - // Three operand (potentially) cross Xmm/GPR instructions. - // The last operand must be an immediate. + // Three operand (potentially) cross Xmm/GPR instructions. The last operand + // must be an immediate. template <typename DReg_t, typename SReg_t> struct ThreeOpImmEmitter { using TypedEmitRegRegImm = void (AssemblerX86Base::*)(Type, DReg_t, SReg_t, const Immediate &); @@ -906,8 +906,8 @@ Label *getOrCreateLabel(SizeT Number, LabelVector &Labels); - // The arith_int() methods factor out the commonality between the encodings of - // add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp(). The Tag + // The arith_int() methods factor out the commonality between the encodings + // of add(), Or(), adc(), sbb(), And(), sub(), Xor(), and cmp(). The Tag // parameter is statically asserted to be less than 8. template <uint32_t Tag> void arith_int(Type Ty, typename Traits::GPRRegister reg, @@ -957,10 +957,10 @@ isByteSizedType(Ty); } - // assembleAndEmitRex is used for determining which (if any) rex prefix should - // be emitted for the current instruction. It allows different types for Reg - // and Rm because they could be of different types (e.g., in mov[sz]x - // instrutions.) If Addr is not nullptr, then Rm is ignored, and Rex.B is + // assembleAndEmitRex is used for determining which (if any) rex prefix + // should be emitted for the current instruction. It allows different types + // for Reg and Rm because they could be of different types (e.g., in mov[sz]x + // instructions.) If Addr is not nullptr, then Rm is ignored, and Rex.B is // determined by Addr instead. TyRm is still used to determine Addr's size. template <typename RegType, typename RmType, typename T = Traits> typename std::enable_if<T::Is64Bit, void>::type @@ -1005,9 +1005,9 @@ assembleAndEmitRex(TyReg, Reg, TyRm, Rm); } - // emitRexB is used for emitting a Rex prefix if one is needed on encoding the - // Reg field in an x86 instruction. It is invoked by the template when Reg is - // the single register operand in the instruction (e.g., push Reg.) + // emitRexB is used for emitting a Rex prefix if one is needed on encoding + // the Reg field in an x86 instruction. It is invoked by the template when + // Reg is the single register operand in the instruction (e.g., push Reg.) template <typename RmType> void emitRexB(const Type Ty, const RmType Rm) { emitRexRB(Ty, RexRegIrrelevant, Ty, Rm); }
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h index f449dae..b1013d6 100644 --- a/src/IceAssemblerX86BaseImpl.h +++ b/src/IceAssemblerX86BaseImpl.h
@@ -1068,9 +1068,8 @@ } // {add,sub,mul,div}ps are given a Ty parameter for consistency with -// {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows -// addpd, etc., we can use the Ty parameter to decide on adding -// a 0x66 prefix. +// {add,sub,mul,div}ss. In the future, when the PNaCl ABI allows addpd, etc., +// we can use the Ty parameter to decide on adding a 0x66 prefix. template <class Machine> void AssemblerX86Base<Machine>::addps(Type /* Ty */, typename Traits::XmmRegister dst, @@ -1836,8 +1835,8 @@ emitUint8(0x0F); emitUint8(0x3A); emitUint8(isByteSizedType(Ty) ? 0x14 : 0x16); - // SSE 4.1 versions are "MRI" because dst can be mem, while - // pextrw (SSE2) is RMI because dst must be reg. + // SSE 4.1 versions are "MRI" because dst can be mem, while pextrw (SSE2) + // is RMI because dst must be reg. emitXmmRegisterOperand(src, dst); emitUint8(imm.value()); } @@ -2147,11 +2146,11 @@ void AssemblerX86Base<Machine>::test(Type Ty, typename Traits::GPRRegister reg, const Immediate &immediate) { AssemblerBuffer::EnsureCapacity ensured(&Buffer); - // For registers that have a byte variant (EAX, EBX, ECX, and EDX) - // we only test the byte register to keep the encoding short. - // This is legal even if the register had high bits set since - // this only sets flags registers based on the "AND" of the two operands, - // and the immediate had zeros at those high bits. + // For registers that have a byte variant (EAX, EBX, ECX, and EDX) we only + // test the byte register to keep the encoding short. This is legal even if + // the register had high bits set since this only sets flags registers based + // on the "AND" of the two operands, and the immediate had zeros at those + // high bits. if (immediate.is_uint8() && reg <= Traits::Last8BitGPR) { // Use zero-extended 8-bit immediate. emitRexB(Ty, reg); @@ -2183,8 +2182,8 @@ const typename Traits::Address &addr, const Immediate &immediate) { AssemblerBuffer::EnsureCapacity ensured(&Buffer); - // If the immediate is short, we only test the byte addr to keep the - // encoding short. + // If the immediate is short, we only test the byte addr to keep the encoding + // short. if (immediate.is_uint8()) { // Use zero-extended 8-bit immediate. emitRex(Ty, addr, RexRegIrrelevant); @@ -3016,10 +3015,10 @@ // TODO(stichnot): Here and in jmp(), we may need to be more // conservative about the backward branch distance if the branch // instruction is within a bundle_lock sequence, because the - // distance may increase when padding is added. This isn't an - // issue for branches outside a bundle_lock, because if padding - // is added, the retry may change it to a long backward branch - // without affecting any of the bookkeeping. + // distance may increase when padding is added. This isn't an issue for + // branches outside a bundle_lock, because if padding is added, the retry + // may change it to a long backward branch without affecting any of the + // bookkeeping. emitUint8(0x70 + condition); emitUint8((offset - kShortSize) & 0xFF); } else {
diff --git a/src/IceBrowserCompileServer.cpp b/src/IceBrowserCompileServer.cpp index 03127a9..4d5705f 100644 --- a/src/IceBrowserCompileServer.cpp +++ b/src/IceBrowserCompileServer.cpp
@@ -106,14 +106,13 @@ BrowserCompileServer *Server = reinterpret_cast<BrowserCompileServer *>(UserData); Server->setFatalError(Reason); - // Only kill the current thread instead of the whole process. - // We need the server thread to remain alive in order to respond with the - // error message. + // Only kill the current thread instead of the whole process. We need the + // server thread to remain alive in order to respond with the error message. // We could also try to pthread_kill all other worker threads, but - // pthread_kill / raising signals is not supported by NaCl. - // We'll have to assume that the worker/emitter threads will be well behaved - // after a fatal error in other threads, and either get stuck waiting - // on input from a previous stage, or also call report_fatal_error. + // pthread_kill / raising signals is not supported by NaCl. We'll have to + // assume that the worker/emitter threads will be well behaved after a fatal + // error in other threads, and either get stuck waiting on input from a + // previous stage, or also call report_fatal_error. pthread_exit(0); } @@ -143,8 +142,8 @@ } bool BrowserCompileServer::pushInputBytes(const void *Data, size_t NumBytes) { - // If there was an earlier error, do not attempt to push bytes to - // the QueueStreamer. Otherwise the thread could become blocked. + // If there was an earlier error, do not attempt to push bytes to the + // QueueStreamer. Otherwise the thread could become blocked. if (HadError.load()) return true; return InputStream->PutBytes( @@ -163,8 +162,8 @@ ErrorCode &BrowserCompileServer::getErrorCode() { if (HadError.load()) { // HadError means report_fatal_error is called. Make sure that the - // LastError is not EC_None. We don't know the type of error so - // just pick some error category. + // LastError is not EC_None. We don't know the type of error so just pick + // some error category. LastError.assign(EC_Translation); } return LastError;
diff --git a/src/IceBrowserCompileServer.h b/src/IceBrowserCompileServer.h index e76b820..f23ab17 100644 --- a/src/IceBrowserCompileServer.h +++ b/src/IceBrowserCompileServer.h
@@ -31,12 +31,11 @@ namespace Ice { -/// The browser variant of the compile server. -/// Compared to the commandline version, this version gets compile -/// requests over IPC. Each compile request will have a slimmed down -/// version of argc, argv while other flags are set to defaults that -/// make sense in the browser case. The output file is specified via -/// a posix FD, and input bytes are pushed to the server. +/// The browser variant of the compile server. Compared to the commandline +/// version, this version gets compile requests over IPC. Each compile request +/// will have a slimmed down version of argc, argv while other flags are set to +/// defaults that make sense in the browser case. The output file is specified +/// via a posix FD, and input bytes are pushed to the server. class BrowserCompileServer : public CompileServer { BrowserCompileServer() = delete; BrowserCompileServer(const BrowserCompileServer &) = delete; @@ -56,12 +55,12 @@ /// Parse and set up the flags for compile jobs. void getParsedFlags(uint32_t NumThreads, int argc, char **argv); - /// Creates the streams + context and starts the compile thread, - /// handing off the streams + context. + /// Creates the streams + context and starts the compile thread, handing off + /// the streams + context. void startCompileThread(int OutFD); - /// Call to push more bytes to the current input stream. - /// Returns false on success and true on error. + /// Call to push more bytes to the current input stream. Returns false on + /// success and true on error. bool pushInputBytes(const void *Data, size_t NumBytes); /// Notify the input stream of EOF. @@ -72,9 +71,8 @@ CompileThread.join(); if (Ctx->getErrorStatus()->value()) LastError.assign(Ctx->getErrorStatus()->value()); - // Reset some state. The InputStream is deleted by the compiler - // so only reset this to nullptr. Free and flush the rest - // of the streams. + // Reset some state. The InputStream is deleted by the compiler so only + // reset this to nullptr. Free and flush the rest of the streams. InputStream = nullptr; EmitStream.reset(nullptr); ELFStream.reset(nullptr); @@ -95,12 +93,12 @@ std::string Buffer; llvm::raw_string_ostream StrBuf; }; - /// This currently only handles a single compile request, hence one copy - /// of the state. + /// This currently only handles a single compile request, hence one copy of + /// the state. std::unique_ptr<GlobalContext> Ctx; - /// A borrowed reference to the current InputStream. The compiler owns - /// the actual reference so the server must be careful not to access - /// after the compiler is done. + /// A borrowed reference to the current InputStream. The compiler owns the + /// actual reference so the server must be careful not to access after the + /// compiler is done. llvm::QueueStreamer *InputStream = nullptr; std::unique_ptr<Ostream> LogStream; std::unique_ptr<llvm::raw_fd_ostream> EmitStream;
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp index ed60abb..4c703cf 100644 --- a/src/IceCfg.cpp +++ b/src/IceCfg.cpp
@@ -8,8 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file implements the Cfg class, including constant pool -/// management. +/// This file implements the Cfg class, including constant pool management. /// //===----------------------------------------------------------------------===// @@ -46,8 +45,8 @@ TargetAssembler(TargetLowering::createAssembler( Ctx->getFlags().getTargetArch(), this)) { if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Randomize) { - // If -randomize-pool-immediates=randomize, create a random number generator - // to generate a cookie for constant blinding. + // If -randomize-pool-immediates=randomize, create a random number + // generator to generate a cookie for constant blinding. RandomNumberGenerator RNG(Ctx->getFlags().getRandomSeed(), RPE_ConstantBlinding, this->SequenceNumber); ConstantBlindingCookie = @@ -86,8 +85,8 @@ ImplicitArgs.push_back(Arg); } -// Returns whether the stack frame layout has been computed yet. This -// is used for dumping the stack frame location of Variables. +// Returns whether the stack frame layout has been computed yet. This is used +// for dumping the stack frame location of Variables. bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); } namespace { @@ -157,8 +156,8 @@ void Cfg::translate() { if (hasError()) return; - // FunctionTimer conditionally pushes/pops a TimerMarker if - // TimeEachFunction is enabled. + // FunctionTimer conditionally pushes/pops a TimerMarker if TimeEachFunction + // is enabled. std::unique_ptr<TimerMarker> FunctionTimer; if (BuildDefs::dump()) { const IceString &TimingFocusOn = @@ -180,16 +179,16 @@ if (getContext()->getFlags().getEnableBlockProfile()) { profileBlocks(); - // TODO(jpp): this is fragile, at best. Figure out a better way of detecting - // exit functions. + // TODO(jpp): this is fragile, at best. Figure out a better way of + // detecting exit functions. if (GlobalContext::matchSymbolName(getFunctionName(), "exit")) { addCallToProfileSummary(); } dump("Profiled CFG"); } - // The set of translation passes and their order are determined by - // the target. + // The set of translation passes and their order are determined by the + // target. getTarget()->translate(); dump("Final output"); @@ -273,8 +272,8 @@ for (Variable *Var : Variables) { Var->getLiveRange().reset(); } - // This splits edges and appends new nodes to the end of the node - // list. This can invalidate iterators, so don't use an iterator. + // This splits edges and appends new nodes to the end of the node list. This + // can invalidate iterators, so don't use an iterator. SizeT NumNodes = getNumNodes(); SizeT NumVars = getNumVariables(); for (SizeT I = 0; I < NumNodes; ++I) @@ -282,8 +281,8 @@ TimerMarker TT(TimerStack::TT_lowerPhiAssignments, this); if (true) { - // The following code does an in-place update of liveness and live ranges as - // a result of adding the new phi edge split nodes. + // The following code does an in-place update of liveness and live ranges + // as a result of adding the new phi edge split nodes. getLiveness()->initPhiEdgeSplits(Nodes.begin() + NumNodes, Variables.begin() + NumVars); TimerMarker TTT(TimerStack::TT_liveness, this); @@ -297,7 +296,7 @@ } } else { // The following code does a brute-force recalculation of live ranges as a - // result of adding the new phi edge split nodes. The liveness calculation + // result of adding the new phi edge split nodes. The liveness calculation // is particularly expensive because the new nodes are not yet in a proper // topological order and so convergence is slower. // @@ -310,27 +309,25 @@ Target->regAlloc(RAK_Phi); } -// Find a reasonable placement for nodes that have not yet been -// placed, while maintaining the same relative ordering among already -// placed nodes. +// Find a reasonable placement for nodes that have not yet been placed, while +// maintaining the same relative ordering among already placed nodes. void Cfg::reorderNodes() { - // TODO(ascull): it would be nice if the switch tests were always followed - // by the default case to allow for fall through. + // TODO(ascull): it would be nice if the switch tests were always followed by + // the default case to allow for fall through. using PlacedList = std::list<CfgNode *>; PlacedList Placed; // Nodes with relative placement locked down PlacedList Unreachable; // Unreachable nodes PlacedList::iterator NoPlace = Placed.end(); - // Keep track of where each node has been tentatively placed so that - // we can manage insertions into the middle. + // Keep track of where each node has been tentatively placed so that we can + // manage insertions into the middle. std::vector<PlacedList::iterator> PlaceIndex(Nodes.size(), NoPlace); for (CfgNode *Node : Nodes) { - // The "do ... while(0);" construct is to factor out the - // --PlaceIndex and assert() statements before moving to the next - // node. + // The "do ... while(0);" construct is to factor out the --PlaceIndex and + // assert() statements before moving to the next node. do { if (Node != getEntryNode() && Node->getInEdges().empty()) { - // The node has essentially been deleted since it is not a - // successor of any other node. + // The node has essentially been deleted since it is not a successor of + // any other node. Unreachable.push_back(Node); PlaceIndex[Node->getIndex()] = Unreachable.end(); Node->setNeedsPlacement(false); @@ -343,10 +340,10 @@ continue; } Node->setNeedsPlacement(false); - // Assume for now that the unplaced node is from edge-splitting - // and therefore has 1 in-edge and 1 out-edge (actually, possibly - // more than 1 in-edge if the predecessor node was contracted). - // If this changes in the future, rethink the strategy. + // Assume for now that the unplaced node is from edge-splitting and + // therefore has 1 in-edge and 1 out-edge (actually, possibly more than 1 + // in-edge if the predecessor node was contracted). If this changes in + // the future, rethink the strategy. assert(Node->getInEdges().size() >= 1); assert(Node->getOutEdges().size() == 1); @@ -363,8 +360,8 @@ // Otherwise, place it after the (first) predecessor. CfgNode *Pred = Node->getInEdges().front(); auto PredPosition = PlaceIndex[Pred->getIndex()]; - // It shouldn't be the case that PredPosition==NoPlace, but if - // that somehow turns out to be true, we just insert Node before + // It shouldn't be the case that PredPosition==NoPlace, but if that + // somehow turns out to be true, we just insert Node before // PredPosition=NoPlace=Placed.end() . if (PredPosition != NoPlace) ++PredPosition; @@ -475,9 +472,9 @@ LA.computeLoopNestDepth(); } -// This is a lightweight version of live-range-end calculation. Marks the last +// This is a lightweight version of live-range-end calculation. Marks the last // use of only those variables whose definition and uses are completely with a -// single block. It is a quick single pass and doesn't need to iterate until +// single block. It is a quick single pass and doesn't need to iterate until // convergence. void Cfg::livenessLightweight() { TimerMarker T(TimerStack::TT_livenessLightweight, this); @@ -513,9 +510,9 @@ for (Variable *Var : Variables) Var->resetLiveRange(); } - // Make a final pass over each node to delete dead instructions, - // collect the first and last instruction numbers, and add live - // range segments for that node. + // Make a final pass over each node to delete dead instructions, collect the + // first and last instruction numbers, and add live range segments for that + // node. for (CfgNode *Node : Nodes) { InstNumberT FirstInstNum = Inst::NumberSentinel; InstNumberT LastInstNum = Inst::NumberSentinel; @@ -538,19 +535,18 @@ } } if (Mode == Liveness_Intervals) { - // Special treatment for live in-args. Their liveness needs to - // extend beyond the beginning of the function, otherwise an arg - // whose only use is in the first instruction will end up having - // the trivial live range [2,2) and will *not* interfere with - // other arguments. So if the first instruction of the method - // is "r=arg1+arg2", both args may be assigned the same - // register. This is accomplished by extending the entry - // block's instruction range from [2,n) to [1,n) which will - // transform the problematic [2,2) live ranges into [1,2). + // Special treatment for live in-args. Their liveness needs to extend + // beyond the beginning of the function, otherwise an arg whose only use + // is in the first instruction will end up having the trivial live range + // [2,2) and will *not* interfere with other arguments. So if the first + // instruction of the method is "r=arg1+arg2", both args may be assigned + // the same register. This is accomplished by extending the entry block's + // instruction range from [2,n) to [1,n) which will transform the + // problematic [2,2) live ranges into [1,2). if (Node == getEntryNode()) { - // TODO(stichnot): Make it a strict requirement that the entry - // node gets the lowest instruction numbers, so that extending - // the live range for in-args is guaranteed to work. + // TODO(stichnot): Make it a strict requirement that the entry node + // gets the lowest instruction numbers, so that extending the live + // range for in-args is guaranteed to work. FirstInstNum = Inst::NumberExtended; } Node->livenessAddIntervals(getLiveness(), FirstInstNum, LastInstNum); @@ -558,8 +554,8 @@ } } -// Traverse every Variable of every Inst and verify that it -// appears within the Variable's computed live range. +// Traverse every Variable of every Inst and verify that it appears within the +// Variable's computed live range. bool Cfg::validateLiveness() const { TimerMarker T(TimerStack::TT_validateLiveness, this); bool Valid = true; @@ -579,13 +575,12 @@ const bool IsDest = true; if (!Dest->getLiveRange().containsValue(InstNumber, IsDest)) Invalid = true; - // Check that this instruction actually *begins* Dest's live - // range, by checking that Dest is not live in the previous - // instruction. As a special exception, we don't check this - // for the first instruction of the block, because a Phi - // temporary may be live at the end of the previous block, - // and if it is also assigned in the first instruction of - // this block, the adjacent live ranges get merged. + // Check that this instruction actually *begins* Dest's live range, + // by checking that Dest is not live in the previous instruction. As + // a special exception, we don't check this for the first instruction + // of the block, because a Phi temporary may be live at the end of + // the previous block, and if it is also assigned in the first + // instruction of this block, the adjacent live ranges get merged. if (static_cast<class Inst *>(&Inst) != FirstInst && !Inst.isDestNonKillable() && Dest->getLiveRange().containsValue(InstNumber - 1, IsDest)) @@ -642,9 +637,9 @@ // ======================== Dump routines ======================== // -// emitTextHeader() is not target-specific (apart from what is -// abstracted by the Assembler), so it is defined here rather than in -// the target lowering class. +// emitTextHeader() is not target-specific (apart from what is abstracted by +// the Assembler), so it is defined here rather than in the target lowering +// class. void Cfg::emitTextHeader(const IceString &MangledName, GlobalContext *Ctx, const Assembler *Asm) { if (!BuildDefs::dump()) @@ -674,8 +669,8 @@ switch (Ctx->getFlags().getOutFileType()) { case FT_Elf: case FT_Iasm: { - // The emission needs to be delayed until the after the text section so save - // the offsets in the global context. + // The emission needs to be delayed until the after the text section so + // save the offsets in the global context. IceString MangledName = Ctx->mangleName(getFunctionName()); for (const InstJumpTable *JumpTable : JumpTables) { SizeT NumTargets = JumpTable->getNumTargets(); @@ -726,8 +721,8 @@ void Cfg::emitIAS() { TimerMarker T(TimerStack::TT_emit, this); - // The emitIAS() routines emit into the internal assembler buffer, - // so there's no need to lock the streams. + // The emitIAS() routines emit into the internal assembler buffer, so there's + // no need to lock the streams. deleteJumpTableInsts(); const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); for (CfgNode *Node : Nodes) {
diff --git a/src/IceCfg.h b/src/IceCfg.h index ca9d706..4147dd9 100644 --- a/src/IceCfg.h +++ b/src/IceCfg.h
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares the Cfg class, which represents the control flow -/// graph and the overall per-function compilation context. +/// This file declares the Cfg class, which represents the control flow graph +/// and the overall per-function compilation context. /// //===----------------------------------------------------------------------===// @@ -50,9 +50,9 @@ GlobalContext *getContext() const { return Ctx; } uint32_t getSequenceNumber() const { return SequenceNumber; } - /// Returns true if any of the specified options in the verbose mask - /// are set. If the argument is omitted, it checks if any verbose - /// options at all are set. + /// Returns true if any of the specified options in the verbose mask are set. + /// If the argument is omitted, it checks if any verbose options at all are + /// set. bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; } void setVerbose(VerboseMask Mask) { VMask = Mask; } @@ -72,11 +72,10 @@ /// \name Manage errors. /// @{ - /// Translation error flagging. If support for some construct is - /// known to be missing, instead of an assertion failure, setError() - /// should be called and the error should be propagated back up. - /// This way, we can gracefully fail to translate and let a fallback - /// translator handle the function. + /// Translation error flagging. If support for some construct is known to be + /// missing, instead of an assertion failure, setError() should be called and + /// the error should be propagated back up. This way, we can gracefully fail + /// to translate and let a fallback translator handle the function. void setError(const IceString &Message); bool hasError() const { return HasError; } IceString getError() const { return ErrorMessage; } @@ -98,11 +97,10 @@ /// @} using IdentifierIndexType = int32_t; - /// Adds a name to the list and returns its index, suitable for the - /// argument to getIdentifierName(). No checking for duplicates is - /// done. This is generally used for node names and variable names - /// to avoid embedding a std::string inside an arena-allocated - /// object. + /// Adds a name to the list and returns its index, suitable for the argument + /// to getIdentifierName(). No checking for duplicates is done. This is + /// generally used for node names and variable names to avoid embedding a + /// std::string inside an arena-allocated object. IdentifierIndexType addIdentifierName(const IceString &Name) { IdentifierIndexType Index = IdentifierNames.size(); IdentifierNames.push_back(Name); @@ -122,8 +120,8 @@ /// \name Manage Variables. /// @{ - /// Create a new Variable with a particular type and an optional - /// name. The Node argument is the node where the variable is defined. + /// Create a new Variable with a particular type and an optional name. The + /// Node argument is the node where the variable is defined. // TODO(jpp): untemplate this with separate methods: makeVariable, // makeSpillVariable, and makeStackVariable. template <typename T = Variable> T *makeVariable(Type Ty) { @@ -176,9 +174,9 @@ /// Passes over the CFG. void translate(); - /// After the CFG is fully constructed, iterate over the nodes and - /// compute the predecessor and successor edges, in the form of - /// CfgNode::InEdges[] and CfgNode::OutEdges[]. + /// After the CFG is fully constructed, iterate over the nodes and compute the + /// predecessor and successor edges, in the form of CfgNode::InEdges[] and + /// CfgNode::OutEdges[]. void computeInOutEdges(); void renumberInstructions(); void placePhiLoads(); @@ -277,16 +275,15 @@ std::unique_ptr<VariableDeclarationList> GlobalInits; std::vector<InstJumpTable *> JumpTables; - /// CurrentNode is maintained during dumping/emitting just for - /// validating Variable::DefNode. Normally, a traversal over - /// CfgNodes maintains this, but before global operations like - /// register allocation, resetCurrentNode() should be called to avoid - /// spurious validation failures. + /// CurrentNode is maintained during dumping/emitting just for validating + /// Variable::DefNode. Normally, a traversal over CfgNodes maintains this, but + /// before global operations like register allocation, resetCurrentNode() + /// should be called to avoid spurious validation failures. const CfgNode *CurrentNode = nullptr; - /// Maintain a pointer in TLS to the current Cfg being translated. - /// This is primarily for accessing its allocator statelessly, but - /// other uses are possible. + /// Maintain a pointer in TLS to the current Cfg being translated. This is + /// primarily for accessing its allocator statelessly, but other uses are + /// possible. ICE_TLS_DECLARE_FIELD(const Cfg *, CurrentCfg); public:
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp index 0ccc6ea..31a6e8a 100644 --- a/src/IceCfgNode.cpp +++ b/src/IceCfgNode.cpp
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file implements the CfgNode class, including the complexities -/// of instruction insertion and in-edge calculation. +/// This file implements the CfgNode class, including the complexities of +/// instruction insertion and in-edge calculation. /// //===----------------------------------------------------------------------===// @@ -29,17 +29,16 @@ CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber) : Func(Func), Number(LabelNumber), LabelNumber(LabelNumber) {} -// Returns the name the node was created with. If no name was given, -// it synthesizes a (hopefully) unique name. +// Returns the name the node was created with. If no name was given, it +// synthesizes a (hopefully) unique name. IceString CfgNode::getName() const { if (NameIndex >= 0) return Func->getIdentifierName(NameIndex); return "__" + std::to_string(LabelNumber); } -// Adds an instruction to either the Phi list or the regular -// instruction list. Validates that all Phis are added before all -// regular instructions. +// Adds an instruction to either the Phi list or the regular instruction list. +// Validates that all Phis are added before all regular instructions. void CfgNode::appendInst(Inst *Inst) { ++InstCountEstimate; if (InstPhi *Phi = llvm::dyn_cast<InstPhi>(Inst)) { @@ -53,11 +52,10 @@ } } -// Renumbers the non-deleted instructions in the node. This needs to -// be done in preparation for live range analysis. The instruction -// numbers in a block must be monotonically increasing. The range of -// instruction numbers in a block, from lowest to highest, must not -// overlap with the range of any other block. +// Renumbers the non-deleted instructions in the node. This needs to be done in +// preparation for live range analysis. The instruction numbers in a block must +// be monotonically increasing. The range of instruction numbers in a block, +// from lowest to highest, must not overlap with the range of any other block. void CfgNode::renumberInstructions() { InstNumberT FirstNumber = Func->getNextInstNumber(); for (Inst &I : Phis) @@ -67,10 +65,9 @@ InstCountEstimate = Func->getNextInstNumber() - FirstNumber; } -// When a node is created, the OutEdges are immediately known, but the -// InEdges have to be built up incrementally. After the CFG has been -// constructed, the computePredecessors() pass finalizes it by -// creating the InEdges list. +// When a node is created, the OutEdges are immediately known, but the InEdges +// have to be built up incrementally. After the CFG has been constructed, the +// computePredecessors() pass finalizes it by creating the InEdges list. void CfgNode::computePredecessors() { for (CfgNode *Succ : OutEdges) Succ->InEdges.push_back(this); @@ -80,19 +77,19 @@ OutEdges = Insts.rbegin()->getTerminatorEdges(); } -// Validate each Phi instruction in the node with respect to control flow. For -// every phi argument, its label must appear in the predecessor list. For each -// predecessor, there must be a phi argument with that label. We don't check +// Validate each Phi instruction in the node with respect to control flow. For +// every phi argument, its label must appear in the predecessor list. For each +// predecessor, there must be a phi argument with that label. We don't check // that phi arguments with the same label have the same value. void CfgNode::validatePhis() { for (Inst &Instr : Phis) { auto *Phi = llvm::cast<InstPhi>(&Instr); - // We do a simple O(N^2) algorithm to check for consistency. Even so, it - // shows up as only about 0.2% of the total translation time. But if - // necessary, we could improve the complexity by using a hash table to count - // how many times each node is referenced in the Phi instruction, and how - // many times each node is referenced in the incoming edge list, and compare - // the two for equality. + // We do a simple O(N^2) algorithm to check for consistency. Even so, it + // shows up as only about 0.2% of the total translation time. But if + // necessary, we could improve the complexity by using a hash table to + // count how many times each node is referenced in the Phi instruction, and + // how many times each node is referenced in the incoming edge list, and + // compare the two for equality. for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { CfgNode *Label = Phi->getLabel(i); bool Found = false; @@ -120,17 +117,17 @@ } } -// This does part 1 of Phi lowering, by creating a new dest variable -// for each Phi instruction, replacing the Phi instruction's dest with -// that variable, and adding an explicit assignment of the old dest to -// the new dest. For example, +// This does part 1 of Phi lowering, by creating a new dest variable for each +// Phi instruction, replacing the Phi instruction's dest with that variable, +// and adding an explicit assignment of the old dest to the new dest. For +// example, // a=phi(...) // changes to // "a_phi=phi(...); a=a_phi". // -// This is in preparation for part 2 which deletes the Phi -// instructions and appends assignment instructions to predecessor -// blocks. Note that this transformation preserves SSA form. +// This is in preparation for part 2 which deletes the Phi instructions and +// appends assignment instructions to predecessor blocks. Note that this +// transformation preserves SSA form. void CfgNode::placePhiLoads() { for (Inst &I : Phis) { auto Phi = llvm::dyn_cast<InstPhi>(&I); @@ -138,38 +135,35 @@ } } -// This does part 2 of Phi lowering. For each Phi instruction at each -// out-edge, create a corresponding assignment instruction, and add -// all the assignments near the end of this block. They need to be -// added before any branch instruction, and also if the block ends -// with a compare instruction followed by a branch instruction that we -// may want to fuse, it's better to insert the new assignments before -// the compare instruction. The tryOptimizedCmpxchgCmpBr() method -// assumes this ordering of instructions. +// This does part 2 of Phi lowering. For each Phi instruction at each out-edge, +// create a corresponding assignment instruction, and add all the assignments +// near the end of this block. They need to be added before any branch +// instruction, and also if the block ends with a compare instruction followed +// by a branch instruction that we may want to fuse, it's better to insert the +// new assignments before the compare instruction. The +// tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions. // -// Note that this transformation takes the Phi dest variables out of -// SSA form, as there may be assignments to the dest variable in -// multiple blocks. +// Note that this transformation takes the Phi dest variables out of SSA form, +// as there may be assignments to the dest variable in multiple blocks. void CfgNode::placePhiStores() { // Find the insertion point. InstList::iterator InsertionPoint = Insts.end(); - // Every block must end in a terminator instruction, and therefore - // must have at least one instruction, so it's valid to decrement - // InsertionPoint (but assert just in case). + // Every block must end in a terminator instruction, and therefore must have + // at least one instruction, so it's valid to decrement InsertionPoint (but + // assert just in case). assert(InsertionPoint != Insts.begin()); --InsertionPoint; - // Confirm that InsertionPoint is a terminator instruction. Calling - // getTerminatorEdges() on a non-terminator instruction will cause - // an llvm_unreachable(). + // Confirm that InsertionPoint is a terminator instruction. Calling + // getTerminatorEdges() on a non-terminator instruction will cause an + // llvm_unreachable(). (void)InsertionPoint->getTerminatorEdges(); // SafeInsertionPoint is always immediately before the terminator - // instruction. If the block ends in a compare and conditional - // branch, it's better to place the Phi store before the compare so - // as not to interfere with compare/branch fusing. However, if the - // compare instruction's dest operand is the same as the new - // assignment statement's source operand, this can't be done due to - // data dependences, so we need to fall back to the - // SafeInsertionPoint. To illustrate: + // instruction. If the block ends in a compare and conditional branch, it's + // better to place the Phi store before the compare so as not to interfere + // with compare/branch fusing. However, if the compare instruction's dest + // operand is the same as the new assignment statement's source operand, this + // can't be done due to data dependences, so we need to fall back to the + // SafeInsertionPoint. To illustrate: // ; <label>:95 // %97 = load i8* %96, align 1 // %98 = icmp ne i8 %97, 0 @@ -188,9 +182,8 @@ // %100 = %100_phi // %101 = %101_phi // - // TODO(stichnot): It may be possible to bypass this whole - // SafeInsertionPoint mechanism. If a source basic block ends in a - // conditional branch: + // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint + // mechanism. If a source basic block ends in a conditional branch: // labelSource: // ... // br i1 %foo, label %labelTrue, label %labelFalse @@ -200,17 +193,17 @@ // then we actually know the constant i1 value of the Phi operand: // labelTrue: // %bar = phi i1 [ true, %labelSource ], ... - // It seems that this optimization should be done by clang or opt, - // but we could also do it here. + // It seems that this optimization should be done by clang or opt, but we + // could also do it here. InstList::iterator SafeInsertionPoint = InsertionPoint; - // Keep track of the dest variable of a compare instruction, so that - // we insert the new instruction at the SafeInsertionPoint if the - // compare's dest matches the Phi-lowered assignment's source. + // Keep track of the dest variable of a compare instruction, so that we + // insert the new instruction at the SafeInsertionPoint if the compare's dest + // matches the Phi-lowered assignment's source. Variable *CmpInstDest = nullptr; - // If the current insertion point is at a conditional branch - // instruction, and the previous instruction is a compare - // instruction, then we move the insertion point before the compare - // instruction so as not to interfere with compare/branch fusing. + // If the current insertion point is at a conditional branch instruction, and + // the previous instruction is a compare instruction, then we move the + // insertion point before the compare instruction so as not to interfere with + // compare/branch fusing. if (InstBr *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) { if (!Branch->isUnconditional()) { if (InsertionPoint != Insts.begin()) { @@ -249,13 +242,12 @@ I.setDeleted(); } -// Splits the edge from Pred to this node by creating a new node and -// hooking up the in and out edges appropriately. (The EdgeIndex -// parameter is only used to make the new node's name unique when -// there are multiple edges between the same pair of nodes.) The new -// node's instruction list is initialized to the empty list, with no -// terminator instruction. There must not be multiple edges from Pred -// to this node so all Inst::getTerminatorEdges implementations must +// Splits the edge from Pred to this node by creating a new node and hooking up +// the in and out edges appropriately. (The EdgeIndex parameter is only used to +// make the new node's name unique when there are multiple edges between the +// same pair of nodes.) The new node's instruction list is initialized to the +// empty list, with no terminator instruction. There must not be multiple edges +// from Pred to this node so all Inst::getTerminatorEdges implementations must // not contain duplicates. CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) { CfgNode *NewNode = Func->makeNode(); @@ -267,8 +259,8 @@ if (BuildDefs::dump()) NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" + std::to_string(EdgeIndex)); - // The new node is added to the end of the node list, and will later - // need to be sorted into a reasonable topological order. + // The new node is added to the end of the node list, and will later need to + // be sorted into a reasonable topological order. NewNode->setNeedsPlacement(true); // Repoint Pred's out-edge. bool Found = false; @@ -319,31 +311,31 @@ } // end of anonymous namespace -// This the "advanced" version of Phi lowering for a basic block, in contrast to -// the simple version that lowers through assignments involving temporaries. +// This the "advanced" version of Phi lowering for a basic block, in contrast +// to the simple version that lowers through assignments involving temporaries. // // All Phi instructions in a basic block are conceptually executed in parallel. // However, if we lower Phis early and commit to a sequential ordering, we may // end up creating unnecessary interferences which lead to worse register -// allocation. Delaying Phi scheduling until after register allocation can help -// unless there are no free registers for shuffling registers or stack slots and -// spilling becomes necessary. +// allocation. Delaying Phi scheduling until after register allocation can help +// unless there are no free registers for shuffling registers or stack slots +// and spilling becomes necessary. // // The advanced Phi lowering starts by finding a topological sort of the Phi -// instructions, where "A=B" comes before "B=C" due to the anti-dependence on B. -// Preexisting register assignments are considered in the topological sort. If -// a topological sort is not possible due to a cycle, the cycle is broken by -// introducing a non-parallel temporary. For example, a cycle arising from a -// permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being +// instructions, where "A=B" comes before "B=C" due to the anti-dependence on +// B. Preexisting register assignments are considered in the topological sort. +// If a topological sort is not possible due to a cycle, the cycle is broken by +// introducing a non-parallel temporary. For example, a cycle arising from a +// permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being // equal, prefer to schedule assignments with register-allocated Src operands // earlier, in case that register becomes free afterwards, and prefer to // schedule assignments with register-allocated Dest variables later, to keep // that register free for longer. // // Once the ordering is determined, the Cfg edge is split and the assignment -// list is lowered by the target lowering layer. Since the assignment lowering +// list is lowered by the target lowering layer. Since the assignment lowering // may create new infinite-weight temporaries, a follow-on register allocation -// pass will be needed. To prepare for this, liveness (including live range +// pass will be needed. To prepare for this, liveness (including live range // calculation) of the split nodes needs to be calculated, and liveness of the // original node need to be updated to "undo" the effects of the phi // assignments. @@ -355,7 +347,7 @@ // allocation pass is run, focusing only on pre-colored and infinite-weight // variables, similar to Om1 register allocation (except without the need to // specially compute these variables' live ranges, since they have already been -// precisely calculated). The register allocator in this mode needs the ability +// precisely calculated). The register allocator in this mode needs the ability // to forcibly spill and reload registers in case none are naturally available. void CfgNode::advancedPhiLowering() { if (getPhis().empty()) @@ -403,17 +395,16 @@ Desc[I].Src = Src; Desc[I].Processed = false; Desc[I].NumPred = 0; - // Cherry-pick any trivial assignments, so that they don't - // contribute to the running complexity of the topological sort. + // Cherry-pick any trivial assignments, so that they don't contribute to + // the running complexity of the topological sort. if (sameVarOrReg(Dest, Src)) { Desc[I].Processed = true; --Remaining; if (Dest != Src) - // If Dest and Src are syntactically the same, don't bother - // adding the assignment, because in all respects it would - // be redundant, and if Dest/Src are on the stack, the - // target lowering may naively decide to lower it using a - // temporary register. + // If Dest and Src are syntactically the same, don't bother adding + // the assignment, because in all respects it would be redundant, and + // if Dest/Src are on the stack, the target lowering may naively + // decide to lower it using a temporary register. Split->appendInst(InstAssign::create(Func, Dest, Src)); } } @@ -427,8 +418,8 @@ if (Desc[J].Processed) continue; if (I != J) { - // There shouldn't be two Phis with the same Dest variable - // or register. + // There shouldn't be two Phis with the same Dest variable or + // register. assert(!sameVarOrReg(Dest, Desc[J].Dest)); } const Operand *Src = Desc[J].Src; @@ -443,8 +434,7 @@ constexpr int32_t WeightNoPreds = 4; // Prefer Src as a register because the register might free up. constexpr int32_t WeightSrcIsReg = 2; - // Prefer Dest not as a register because the register stays free - // longer. + // Prefer Dest not as a register because the register stays free longer. constexpr int32_t WeightDestNotReg = 1; for (size_t I = 0; I < NumPhis; ++I) { @@ -461,11 +451,10 @@ Desc[I].Weight = Weight; } - // Repeatedly choose and process the best candidate in the - // topological sort, until no candidates remain. This - // implementation is O(N^2) where N is the number of Phi - // instructions, but with a small constant factor compared to a - // likely implementation of O(N) topological sort. + // Repeatedly choose and process the best candidate in the topological + // sort, until no candidates remain. This implementation is O(N^2) where N + // is the number of Phi instructions, but with a small constant factor + // compared to a likely implementation of O(N) topological sort. for (; Remaining; --Remaining) { size_t BestIndex = 0; int32_t BestWeight = -1; @@ -488,9 +477,9 @@ // Break a cycle by introducing a temporary. if (Desc[BestIndex].NumPred) { bool Found = false; - // If the target instruction "A=B" is part of a cycle, find - // the "X=A" assignment in the cycle because it will have to - // be rewritten as "X=tmp". + // If the target instruction "A=B" is part of a cycle, find the "X=A" + // assignment in the cycle because it will have to be rewritten as + // "X=tmp". for (size_t J = 0; !Found && J < NumPhis; ++J) { if (Desc[J].Processed) continue; @@ -510,9 +499,8 @@ // Now that a cycle (if any) has been broken, create the actual // assignment. Split->appendInst(InstAssign::create(Func, Dest, Src)); - // Update NumPred for all Phi assignments using this Phi's Src - // as their Dest variable. Also update Weight if NumPred - // dropped from 1 to 0. + // Update NumPred for all Phi assignments using this Phi's Src as their + // Dest variable. Also update Weight if NumPred dropped from 1 to 0. if (auto Var = llvm::dyn_cast<Variable>(Src)) { for (size_t I = 0; I < NumPhis; ++I) { if (Desc[I].Processed) @@ -532,10 +520,9 @@ } } -// Does address mode optimization. Pass each instruction to the -// TargetLowering object. If it returns a new instruction -// (representing the optimized address mode), then insert the new -// instruction and delete the old. +// Does address mode optimization. Pass each instruction to the TargetLowering +// object. If it returns a new instruction (representing the optimized address +// mode), then insert the new instruction and delete the old. void CfgNode::doAddressOpt() { TargetLowering *Target = Func->getTarget(); LoweringContext &Context = Target->getContext(); @@ -567,8 +554,8 @@ } } -// Drives the target lowering. Passes the current instruction and the -// next non-deleted instruction for target lowering. +// Drives the target lowering. Passes the current instruction and the next +// non-deleted instruction for target lowering. void CfgNode::genCode() { TargetLowering *Target = Func->getTarget(); LoweringContext &Context = Target->getContext(); @@ -603,24 +590,23 @@ } } -// Performs liveness analysis on the block. Returns true if the -// incoming liveness changed from before, false if it stayed the same. -// (If it changes, the node's predecessors need to be processed -// again.) +// Performs liveness analysis on the block. Returns true if the incoming +// liveness changed from before, false if it stayed the same. (If it changes, +// the node's predecessors need to be processed again.) bool CfgNode::liveness(Liveness *Liveness) { SizeT NumVars = Liveness->getNumVarsInNode(this); LivenessBV Live(NumVars); LiveBeginEndMap *LiveBegin = nullptr; LiveBeginEndMap *LiveEnd = nullptr; - // Mark the beginning and ending of each variable's live range - // with the sentinel instruction number 0. + // Mark the beginning and ending of each variable's live range with the + // sentinel instruction number 0. if (Liveness->getMode() == Liveness_Intervals) { LiveBegin = Liveness->getLiveBegin(this); LiveEnd = Liveness->getLiveEnd(this); LiveBegin->clear(); LiveEnd->clear(); - // Guess that the number of live ranges beginning is roughly the - // number of instructions, and same for live ranges ending. + // Guess that the number of live ranges beginning is roughly the number of + // instructions, and same for live ranges ending. LiveBegin->reserve(getInstCountEstimate()); LiveEnd->reserve(getInstCountEstimate()); } @@ -643,9 +629,8 @@ continue; I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd); } - // Process phis in forward order so that we can override the - // instruction number to be that of the earliest phi instruction in - // the block. + // Process phis in forward order so that we can override the instruction + // number to be that of the earliest phi instruction in the block. SizeT NumNonDeadPhis = 0; InstNumberT FirstPhiNumber = Inst::NumberSentinel; for (Inst &I : Phis) { @@ -657,18 +642,17 @@ ++NumNonDeadPhis; } - // When using the sparse representation, after traversing the - // instructions in the block, the Live bitvector should only contain - // set bits for global variables upon block entry. We validate this - // by shrinking the Live vector and then testing it against the - // pre-shrunk version. (The shrinking is required, but the - // validation is not.) + // When using the sparse representation, after traversing the instructions in + // the block, the Live bitvector should only contain set bits for global + // variables upon block entry. We validate this by shrinking the Live vector + // and then testing it against the pre-shrunk version. (The shrinking is + // required, but the validation is not.) LivenessBV LiveOrig = Live; Live.resize(Liveness->getNumGlobalVars()); if (Live != LiveOrig) { if (BuildDefs::dump()) { - // This is a fatal liveness consistency error. Print some - // diagnostics and abort. + // This is a fatal liveness consistency error. Print some diagnostics and + // abort. Ostream &Str = Func->getContext()->getStrDump(); Func->resetCurrentNode(); Str << "LiveOrig-Live ="; @@ -697,13 +681,12 @@ return Changed; } -// Once basic liveness is complete, compute actual live ranges. It is -// assumed that within a single basic block, a live range begins at -// most once and ends at most once. This is certainly true for pure -// SSA form. It is also true once phis are lowered, since each -// assignment to the phi-based temporary is in a different basic -// block, and there is a single read that ends the live in the basic -// block that contained the actual phi instruction. +// Once basic liveness is complete, compute actual live ranges. It is assumed +// that within a single basic block, a live range begins at most once and ends +// at most once. This is certainly true for pure SSA form. It is also true once +// phis are lowered, since each assignment to the phi-based temporary is in a +// different basic block, and there is a single read that ends the live in the +// basic block that contained the actual phi instruction. void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum, InstNumberT LastInstNum) { TimerMarker T1(TimerStack::TT_liveRange, Func); @@ -736,14 +719,13 @@ SizeT i1 = IBB == IBE ? NumVars : IBB->first; SizeT i2 = IEB == IEE ? NumVars : IEB->first; SizeT i = std::min(i1, i2); - // i1 is the Variable number of the next MapBegin entry, and i2 is - // the Variable number of the next MapEnd entry. If i1==i2, then - // the Variable's live range begins and ends in this block. If - // i1<i2, then i1's live range begins at instruction IBB->second - // and extends through the end of the block. If i1>i2, then i2's - // live range begins at the first instruction of the block and - // ends at IEB->second. In any case, we choose the lesser of i1 - // and i2 and proceed accordingly. + // i1 is the Variable number of the next MapBegin entry, and i2 is the + // Variable number of the next MapEnd entry. If i1==i2, then the Variable's + // live range begins and ends in this block. If i1<i2, then i1's live range + // begins at instruction IBB->second and extends through the end of the + // block. If i1>i2, then i2's live range begins at the first instruction of + // the block and ends at IEB->second. In any case, we choose the lesser of + // i1 and i2 and proceed accordingly. InstNumberT LB = i == i1 ? IBB->second : FirstInstNum; InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1; @@ -751,9 +733,9 @@ if (LB > LE) { Var->addLiveRange(FirstInstNum, LE); Var->addLiveRange(LB, LastInstNum + 1); - // Assert that Var is a global variable by checking that its - // liveness index is less than the number of globals. This - // ensures that the LiveInAndOut[] access is valid. + // Assert that Var is a global variable by checking that its liveness + // index is less than the number of globals. This ensures that the + // LiveInAndOut[] access is valid. assert(i < Liveness->getNumGlobalVars()); LiveInAndOut[i] = false; } else { @@ -774,8 +756,8 @@ } // If this node contains only deleted instructions, and ends in an -// unconditional branch, contract the node by repointing all its -// in-edges to its successor. +// unconditional branch, contract the node by repointing all its in-edges to +// its successor. void CfgNode::contractIfEmpty() { if (InEdges.empty()) return; @@ -795,10 +777,10 @@ Branch->setDeleted(); CfgNode *Successor = OutEdges.front(); - // Repoint all this node's in-edges to this node's successor, unless - // this node's successor is actually itself (in which case the - // statement "OutEdges.front()->InEdges.push_back(Pred)" could - // invalidate the iterator over this->InEdges). + // Repoint all this node's in-edges to this node's successor, unless this + // node's successor is actually itself (in which case the statement + // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator + // over this->InEdges). if (Successor != this) { for (CfgNode *Pred : InEdges) { for (CfgNode *&I : Pred->OutEdges) { @@ -814,8 +796,8 @@ } // Remove the in-edge to the successor to allow node reordering to make - // better decisions. For example it's more helpful to place a node after - // a reachable predecessor than an unreachable one (like the one we just + // better decisions. For example it's more helpful to place a node after a + // reachable predecessor than an unreachable one (like the one we just // contracted). Successor->InEdges.erase( std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this)); @@ -826,10 +808,10 @@ void CfgNode::doBranchOpt(const CfgNode *NextNode) { TargetLowering *Target = Func->getTarget(); // Find the first opportunity for branch optimization (which will be the last - // instruction in the block) and stop. This is sufficient unless there is some - // target lowering where we have the possibility of multiple optimizations per - // block. Take care with switch lowering as there are multiple unconditional - // branches and only the last can be deleted. + // instruction in the block) and stop. This is sufficient unless there is + // some target lowering where we have the possibility of multiple + // optimizations per block. Take care with switch lowering as there are + // multiple unconditional branches and only the last can be deleted. for (Inst &I : reverse_range(Insts)) { if (!I.isDeleted()) { Target->doBranchOpt(&I, NextNode); @@ -869,8 +851,8 @@ } } } - // Sort the variables by regnum so they are always printed in a - // familiar order. + // Sort the variables by regnum so they are always printed in a familiar + // order. std::sort(LiveRegs.begin(), LiveRegs.end(), [](const Variable *V1, const Variable *V2) { return V1->getRegNum() < V2->getRegNum(); @@ -892,11 +874,11 @@ return; bool First = true; Variable *Dest = Instr->getDest(); - // Normally we increment the live count for the dest register. But - // we shouldn't if the instruction's IsDestNonKillable flag is set, - // because this means that the target lowering created this - // instruction as a non-SSA assignment; i.e., a different, previous - // instruction started the dest variable's live range. + // Normally we increment the live count for the dest register. But we + // shouldn't if the instruction's IsDestNonKillable flag is set, because this + // means that the target lowering created this instruction as a non-SSA + // assignment; i.e., a different, previous instruction started the dest + // variable's live range. if (!Instr->isDestNonKillable() && Dest && Dest->hasReg()) ++LiveRegCount[Dest->getRegNum()]; FOREACH_VAR_IN_INST(Var, *Instr) { @@ -921,8 +903,8 @@ void updateStats(Cfg *Func, const Inst *I) { if (!BuildDefs::dump()) return; - // Update emitted instruction count, plus fill/spill count for - // Variable operands without a physical register. + // Update emitted instruction count, plus fill/spill count for Variable + // operands without a physical register. if (uint32_t Count = I->getEmitInstCount()) { Func->getContext()->statsUpdateEmitted(Count); if (Variable *Dest = I->getDest()) { @@ -949,10 +931,10 @@ bool DecorateAsm = Liveness && Func->getContext()->getFlags().getDecorateAsm(); Str << getAsmName() << ":\n"; - // LiveRegCount keeps track of the number of currently live - // variables that each register is assigned to. Normally that would - // be only 0 or 1, but the register allocator's AllowOverlap - // inference allows it to be greater than 1 for short periods. + // LiveRegCount keeps track of the number of currently live variables that + // each register is assigned to. Normally that would be only 0 or 1, but the + // register allocator's AllowOverlap inference allows it to be greater than 1 + // for short periods. std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters()); if (DecorateAsm) { constexpr bool IsLiveIn = true; @@ -969,15 +951,14 @@ if (I.isDeleted()) continue; if (I.isRedundantAssign()) { - // Usually, redundant assignments end the live range of the src - // variable and begin the live range of the dest variable, with - // no net effect on the liveness of their register. However, if - // the register allocator infers the AllowOverlap condition, - // then this may be a redundant assignment that does not end the - // src variable's live range, in which case the active variable - // count for that register needs to be bumped. That normally - // would have happened as part of emitLiveRangesEnded(), but - // that isn't called for redundant assignments. + // Usually, redundant assignments end the live range of the src variable + // and begin the live range of the dest variable, with no net effect on + // the liveness of their register. However, if the register allocator + // infers the AllowOverlap condition, then this may be a redundant + // assignment that does not end the src variable's live range, in which + // case the active variable count for that register needs to be bumped. + // That normally would have happened as part of emitLiveRangesEnded(), + // but that isn't called for redundant assignments. Variable *Dest = I.getDest(); if (DecorateAsm && Dest->hasReg() && !I.isLastUse(I.getSrc(0))) ++LiveRegCount[Dest->getRegNum()]; @@ -1010,41 +991,38 @@ BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {} // Check whether we're currently within a bundle_lock region. bool isInBundleLockRegion() const { return BundleLockStart != End; } - // Check whether the current bundle_lock region has the align_to_end - // option. + // Check whether the current bundle_lock region has the align_to_end option. bool isAlignToEnd() const { assert(isInBundleLockRegion()); return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == InstBundleLock::Opt_AlignToEnd; } - // Check whether the entire bundle_lock region falls within the same - // bundle. + // Check whether the entire bundle_lock region falls within the same bundle. bool isSameBundle() const { assert(isInBundleLockRegion()); return SizeSnapshotPre == SizeSnapshotPost || (SizeSnapshotPre & BundleMaskHi) == ((SizeSnapshotPost - 1) & BundleMaskHi); } - // Get the bundle alignment of the first instruction of the - // bundle_lock region. + // Get the bundle alignment of the first instruction of the bundle_lock + // region. intptr_t getPreAlignment() const { assert(isInBundleLockRegion()); return SizeSnapshotPre & BundleMaskLo; } - // Get the bundle alignment of the first instruction past the - // bundle_lock region. + // Get the bundle alignment of the first instruction past the bundle_lock + // region. intptr_t getPostAlignment() const { assert(isInBundleLockRegion()); return SizeSnapshotPost & BundleMaskLo; } - // Get the iterator pointing to the bundle_lock instruction, e.g. to - // roll back the instruction iteration to that point. + // Get the iterator pointing to the bundle_lock instruction, e.g. to roll + // back the instruction iteration to that point. InstList::const_iterator getBundleLockStart() const { assert(isInBundleLockRegion()); return BundleLockStart; } - // Set up bookkeeping when the bundle_lock instruction is first - // processed. + // Set up bookkeeping when the bundle_lock instruction is first processed. void enterBundleLock(InstList::const_iterator I) { assert(!isInBundleLockRegion()); BundleLockStart = I; @@ -1053,18 +1031,16 @@ Target->snapshotEmitState(); assert(isInBundleLockRegion()); } - // Update bookkeeping when the bundle_unlock instruction is - // processed. + // Update bookkeeping when the bundle_unlock instruction is processed. void enterBundleUnlock() { assert(isInBundleLockRegion()); SizeSnapshotPost = Asm->getBufferSize(); } - // Update bookkeeping when we are completely finished with the - // bundle_lock region. + // Update bookkeeping when we are completely finished with the bundle_lock + // region. void leaveBundleLockRegion() { BundleLockStart = End; } - // Check whether the instruction sequence fits within the current - // bundle, and if not, add nop padding to the end of the current - // bundle. + // Check whether the instruction sequence fits within the current bundle, and + // if not, add nop padding to the end of the current bundle. void padToNextBundle() { assert(isInBundleLockRegion()); if (!isSameBundle()) { @@ -1076,8 +1052,8 @@ assert(Asm->getBufferSize() == SizeSnapshotPre); } } - // If align_to_end is specified, add padding such that the - // instruction sequences ends precisely at a bundle boundary. + // If align_to_end is specified, add padding such that the instruction + // sequences ends precisely at a bundle boundary. void padForAlignToEnd() { assert(isInBundleLockRegion()); if (isAlignToEnd()) { @@ -1098,8 +1074,8 @@ private: Assembler *const Asm; TargetLowering *const Target; - // End is a sentinel value such that BundleLockStart==End implies - // that we are not in a bundle_lock region. + // End is a sentinel value such that BundleLockStart==End implies that we are + // not in a bundle_lock region. const InstList::const_iterator End; InstList::const_iterator BundleLockStart; const intptr_t BundleSize; @@ -1116,9 +1092,9 @@ void CfgNode::emitIAS(Cfg *Func) const { Func->setCurrentNode(this); Assembler *Asm = Func->getAssembler<>(); - // TODO(stichnot): When sandboxing, defer binding the node label - // until just before the first instruction is emitted, to reduce the - // chance that a padding nop is a branch target. + // TODO(stichnot): When sandboxing, defer binding the node label until just + // before the first instruction is emitted, to reduce the chance that a + // padding nop is a branch target. Asm->bindCfgNodeLabel(getIndex()); for (const Inst &I : Phis) { if (I.isDeleted()) @@ -1138,33 +1114,33 @@ return; } - // The remainder of the function handles emission with sandboxing. - // There are explicit bundle_lock regions delimited by bundle_lock - // and bundle_unlock instructions. All other instructions are - // treated as an implicit one-instruction bundle_lock region. - // Emission is done twice for each bundle_lock region. The first - // pass is a preliminary pass, after which we can figure out what - // nop padding is needed, then roll back, and make the final pass. + // The remainder of the function handles emission with sandboxing. There are + // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock + // instructions. All other instructions are treated as an implicit + // one-instruction bundle_lock region. Emission is done twice for each + // bundle_lock region. The first pass is a preliminary pass, after which we + // can figure out what nop padding is needed, then roll back, and make the + // final pass. // - // Ideally, the first pass would be speculative and the second pass - // would only be done if nop padding were needed, but the structure - // of the integrated assembler makes it hard to roll back the state - // of label bindings, label links, and relocation fixups. Instead, - // the first pass just disables all mutation of that state. + // Ideally, the first pass would be speculative and the second pass would + // only be done if nop padding were needed, but the structure of the + // integrated assembler makes it hard to roll back the state of label + // bindings, label links, and relocation fixups. Instead, the first pass just + // disables all mutation of that state. BundleEmitHelper Helper(Asm, Func->getTarget(), Insts); InstList::const_iterator End = Insts.end(); - // Retrying indicates that we had to roll back to the bundle_lock - // instruction to apply padding before the bundle_lock sequence. + // Retrying indicates that we had to roll back to the bundle_lock instruction + // to apply padding before the bundle_lock sequence. bool Retrying = false; for (InstList::const_iterator I = Insts.begin(); I != End; ++I) { if (I->isDeleted() || I->isRedundantAssign()) continue; if (llvm::isa<InstBundleLock>(I)) { - // Set up the initial bundle_lock state. This should not happen - // while retrying, because the retry rolls back to the - // instruction following the bundle_lock instruction. + // Set up the initial bundle_lock state. This should not happen while + // retrying, because the retry rolls back to the instruction following + // the bundle_lock instruction. assert(!Retrying); Helper.enterBundleLock(I); continue; @@ -1175,16 +1151,16 @@ if (Retrying) { // Make sure all instructions are in the same bundle. assert(Helper.isSameBundle()); - // If align_to_end is specified, make sure the next - // instruction begins the bundle. + // If align_to_end is specified, make sure the next instruction begins + // the bundle. assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0); Helper.leaveBundleLockRegion(); Retrying = false; } else { // This is the first pass, so roll back for the retry pass. Helper.rollback(); - // Pad to the next bundle if the instruction sequence crossed - // a bundle boundary. + // Pad to the next bundle if the instruction sequence crossed a bundle + // boundary. Helper.padToNextBundle(); // Insert additional padding to make AlignToEnd work. Helper.padForAlignToEnd(); @@ -1215,8 +1191,8 @@ } } - // Don't allow bundle locking across basic blocks, to keep the - // backtracking mechanism simple. + // Don't allow bundle locking across basic blocks, to keep the backtracking + // mechanism simple. assert(!Helper.isInBundleLockRegion()); assert(!Retrying); }
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h index a4744db..c6aa729 100644 --- a/src/IceCfgNode.h +++ b/src/IceCfgNode.h
@@ -8,9 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares the CfgNode class, which represents a single -/// basic block as its instruction list, in-edge list, and out-edge -/// list. +/// This file declares the CfgNode class, which represents a single basic block +/// as its instruction list, in-edge list, and out-edge list. /// //===----------------------------------------------------------------------===// @@ -50,8 +49,8 @@ void setLoopNestDepth(SizeT NewDepth) { LoopNestDepth = NewDepth; } SizeT getLoopNestDepth() const { return LoopNestDepth; } - /// The HasReturn flag indicates that this node contains a return - /// instruction and therefore needs an epilog. + /// The HasReturn flag indicates that this node contains a return instruction + /// and therefore needs an epilog. void setHasReturn() { HasReturn = true; } bool getHasReturn() const { return HasReturn; } @@ -73,18 +72,17 @@ PhiList &getPhis() { return Phis; } void appendInst(Inst *Inst); void renumberInstructions(); - /// Rough and generally conservative estimate of the number of - /// instructions in the block. It is updated when an instruction is - /// added, but not when deleted. It is recomputed during - /// renumberInstructions(). + /// Rough and generally conservative estimate of the number of instructions in + /// the block. It is updated when an instruction is added, but not when + /// deleted. It is recomputed during renumberInstructions(). InstNumberT getInstCountEstimate() const { return InstCountEstimate; } /// @} /// \name Manage predecessors and successors. /// @{ - /// Add a predecessor edge to the InEdges list for each of this - /// node's successors. + /// Add a predecessor edge to the InEdges list for each of this node's + /// successors. void computePredecessors(); void computeSuccessors(); CfgNode *splitIncomingEdge(CfgNode *Pred, SizeT InEdgeIndex);
diff --git a/src/IceClFlags.cpp b/src/IceClFlags.cpp index 6c2e984..a27fb3f 100644 --- a/src/IceClFlags.cpp +++ b/src/IceClFlags.cpp
@@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file defines commandline flags parsing. -/// This currently relies on llvm::cl to parse. In the future, the minimal -/// build can have a simpler parser. +/// This file defines commandline flags parsing. This currently relies on +/// llvm::cl to parse. In the future, the minimal build can have a simpler +/// parser. /// //===----------------------------------------------------------------------===// @@ -99,16 +99,15 @@ cl::opt<bool> MockBoundsCheck("mock-bounds-check", cl::desc("Mock bounds checking on loads/stores")); -// Number of translation threads (in addition to the parser thread and -// the emitter thread). The special case of 0 means purely -// sequential, i.e. parser, translator, and emitter all within the -// same single thread. (This may need a slight rework if we expand to -// multiple parser or emitter threads.) +// Number of translation threads (in addition to the parser thread and the +// emitter thread). The special case of 0 means purely sequential, i.e. parser, +// translator, and emitter all within the same single thread. (This may need a +// slight rework if we expand to multiple parser or emitter threads.) cl::opt<uint32_t> NumThreads( "threads", cl::desc("Number of translation threads (0 for purely sequential)"), - // TODO(stichnot): Settle on a good default. Consider - // something related to std::thread::hardware_concurrency(). + // TODO(stichnot): Settle on a good default. Consider something related to + // std::thread::hardware_concurrency(). cl::init(2)); cl::opt<Ice::OptLevel> OLevel(cl::desc("Optimization level"), @@ -125,9 +124,9 @@ cl::desc("Enable edge splitting for Phi lowering"), cl::init(true)); -// TODO(stichnot): See if we can easily use LLVM's -rng-seed option -// and implementation. I expect the implementation is different and -// therefore the tests would need to be changed. +// TODO(stichnot): See if we can easily use LLVM's -rng-seed option and +// implementation. I expect the implementation is different and therefore the +// tests would need to be changed. cl::opt<unsigned long long> RandomSeed("sz-seed", cl::desc("Seed the random number generator"), cl::init(1)); @@ -255,10 +254,10 @@ "exit-success", cl::desc("Exit with success status, even if errors found"), cl::init(false)); -// Note: While this flag isn't used in the minimal build, we keep this -// flag so that tests can set this command-line flag without concern -// to the type of build. We double check that this flag at runtime -// to make sure the consistency is maintained. +// Note: While this flag isn't used in the minimal build, we keep this flag so +// that tests can set this command-line flag without concern to the type of +// build. We double check that this flag at runtime to make sure the +// consistency is maintained. cl::opt<bool> BuildOnRead("build-on-read", cl::desc("Build ICE instructions when reading bitcode"), @@ -413,8 +412,8 @@ ::DisableTranslation = true; Ice::VerboseMask VMask = Ice::IceV_None; - // Don't generate verbose messages if routines - // to dump messages are not available. + // Don't generate verbose messages if routines to dump messages are not + // available. if (BuildDefs::dump()) { for (unsigned i = 0; i != VerboseList.size(); ++i) VMask |= VerboseList[i];
diff --git a/src/IceClFlags.h b/src/IceClFlags.h index 89f5783..87e16cd 100644 --- a/src/IceClFlags.h +++ b/src/IceClFlags.h
@@ -79,8 +79,8 @@ void setFunctionSections(bool NewValue) { FunctionSections = NewValue; } bool getGenerateUnitTestMessages() const { - // Note: If dump routines have been turned off, the error messages - // will not be readable. Hence, turn off. + // Note: If dump routines have been turned off, the error messages will not + // be readable. Hence, turn off. return !BuildDefs::dump() || GenerateUnitTestMessages; } void setGenerateUnitTestMessages(bool NewValue) {
diff --git a/src/IceCompileServer.cpp b/src/IceCompileServer.cpp index b7bc72b..db0694b 100644 --- a/src/IceCompileServer.cpp +++ b/src/IceCompileServer.cpp
@@ -37,9 +37,8 @@ namespace { -// Define a SmallVector backed buffer as a data stream, so that it -// can hold the generated binary version of the textual bitcode in the -// input file. +// Define a SmallVector backed buffer as a data stream, so that it can hold the +// generated binary version of the textual bitcode in the input file. class TextDataStreamer : public llvm::DataStreamer { public: TextDataStreamer() = default; @@ -129,8 +128,8 @@ } ELFStr.reset(new ELFStreamer(*FdOs.get())); Os.reset(FdOs.release()); - // NaCl sets st_blksize to 0, and LLVM uses that to pick the - // default preferred buffer size. Set to something non-zero. + // NaCl sets st_blksize to 0, and LLVM uses that to pick the default + // preferred buffer size. Set to something non-zero. Os->SetBufferSize(1 << 14); } break; case FT_Asm:
diff --git a/src/IceCompileServer.h b/src/IceCompileServer.h index e027cbb..8d99927 100644 --- a/src/IceCompileServer.h +++ b/src/IceCompileServer.h
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares the compile server. Given a compiler implementation, -/// it dispatches compile requests to the implementation. +/// This file declares the compile server. Given a compiler implementation, it +/// dispatches compile requests to the implementation. /// //===----------------------------------------------------------------------===// @@ -27,17 +27,17 @@ namespace Ice { -/// A CompileServer awaits compile requests, and dispatches the requests -/// to a given Compiler. Each request is paired with an input stream, -/// a context (which has the output stream), and a set of arguments. -/// The CompileServer takes over the current thread to listen to requests, -/// and compile requests are handled on separate threads. +/// A CompileServer awaits compile requests, and dispatches the requests to a +/// given Compiler. Each request is paired with an input stream, a context +/// (which has the output stream), and a set of arguments. The CompileServer +/// takes over the current thread to listen to requests, and compile requests +/// are handled on separate threads. /// /// Currently, this only handles a single request. /// -/// When run on the commandline, it receives and therefore dispatches -/// the request immediately. When run in the browser, it blocks waiting -/// for a request. +/// When run on the commandline, it receives and therefore dispatches the +/// request immediately. When run in the browser, it blocks waiting for a +/// request. class CompileServer { CompileServer() = delete; CompileServer(const CompileServer &) = delete;
diff --git a/src/IceCompiler.cpp b/src/IceCompiler.cpp index 4d3bbd4..b4b6c89 100644 --- a/src/IceCompiler.cpp +++ b/src/IceCompiler.cpp
@@ -52,8 +52,8 @@ {"minimal_build", BuildDefs::minimal()}, {"browser_mode", PNACL_BROWSER_TRANSLATOR}}; -// Validates values of build attributes. Prints them to Stream if -// Stream is non-null. +// Validates values of build attributes. Prints them to Stream if Stream is +// non-null. void validateAndGenerateBuildAttributes(Ostream *Stream) { // List the supported targets. if (Stream) { @@ -100,7 +100,7 @@ } // The Minimal build (specifically, when dump()/emit() are not implemented) - // allows only --filetype=obj. Check here to avoid cryptic error messages + // allows only --filetype=obj. Check here to avoid cryptic error messages // downstream. if (!BuildDefs::dump() && Ctx.getFlags().getOutFileType() != FT_Elf) { // TODO(stichnot): Access the actual command-line argument via
diff --git a/src/IceCompiler.h b/src/IceCompiler.h index e121dbb..6239b9f 100644 --- a/src/IceCompiler.h +++ b/src/IceCompiler.h
@@ -33,8 +33,8 @@ public: Compiler() = default; - /// Run the compiler with the given GlobalContext for compilation - /// state. Upon error, the Context's error status will be set. + /// Run the compiler with the given GlobalContext for compilation state. Upon + /// error, the Context's error status will be set. void run(const ClFlagsExtra &ExtraFlags, GlobalContext &Ctx, std::unique_ptr<llvm::DataStreamer> &&InputStream); };
diff --git a/src/IceConditionCodesARM32.h b/src/IceConditionCodesARM32.h index d897a44..d739310 100644 --- a/src/IceConditionCodesARM32.h +++ b/src/IceConditionCodesARM32.h
@@ -26,8 +26,8 @@ CondARM32 &operator=(const CondARM32 &) = delete; public: - /// An enum of codes used for conditional instructions. The enum value - /// should match the value used to encode operands in binary instructions. + /// An enum of codes used for conditional instructions. The enum value should + /// match the value used to encode operands in binary instructions. enum Cond { #define X(tag, encode, opp, emit) tag = encode, ICEINSTARM32COND_TABLE
diff --git a/src/IceConverter.cpp b/src/IceConverter.cpp index 4450a79..a4d4f53 100644 --- a/src/IceConverter.cpp +++ b/src/IceConverter.cpp
@@ -52,9 +52,9 @@ // Base class for converting LLVM to ICE. // TODO(stichnot): Redesign Converter, LLVM2ICEConverter, -// LLVM2ICEFunctionConverter, and LLVM2ICEGlobalsConverter with -// respect to Translator. In particular, the unique_ptr ownership -// rules in LLVM2ICEFunctionConverter. +// LLVM2ICEFunctionConverter, and LLVM2ICEGlobalsConverter with respect to +// Translator. In particular, the unique_ptr ownership rules in +// LLVM2ICEFunctionConverter. class LLVM2ICEConverter { LLVM2ICEConverter() = delete; LLVM2ICEConverter(const LLVM2ICEConverter &) = delete; @@ -73,11 +73,11 @@ const Ice::TypeConverter TypeConverter; }; -// Converter from LLVM functions to ICE. The entry point is the -// convertFunction method. +// Converter from LLVM functions to ICE. The entry point is the convertFunction +// method. // -// Note: this currently assumes that the given IR was verified to be -// valid PNaCl bitcode. Otherwise, the behavior is undefined. +// Note: this currently assumes that the given IR was verified to be valid +// PNaCl bitcode. Otherwise, the behavior is undefined. class LLVM2ICEFunctionConverter : LLVM2ICEConverter { LLVM2ICEFunctionConverter() = delete; LLVM2ICEFunctionConverter(const LLVM2ICEFunctionConverter &) = delete; @@ -107,10 +107,9 @@ Func->addArg(mapValueToIceVar(ArgI)); } - // Make an initial pass through the block list just to resolve the - // blocks in the original linearized order. Otherwise the ICE - // linearized order will be affected by branch targets in - // terminator instructions. + // Make an initial pass through the block list just to resolve the blocks + // in the original linearized order. Otherwise the ICE linearized order + // will be affected by branch targets in terminator instructions. for (const BasicBlock &BBI : *F) mapBasicBlockToNode(&BBI); for (const BasicBlock &BBI : *F) @@ -122,9 +121,8 @@ Converter.translateFcn(std::move(Func)); } - // convertConstant() does not use Func or require it to be a valid - // Ice::Cfg pointer. As such, it's suitable for e.g. constructing - // global initializers. + // convertConstant() does not use Func or require it to be a valid Ice::Cfg + // pointer. As such, it's suitable for e.g. constructing global initializers. Ice::Constant *convertConstant(const Constant *Const) { if (const auto GV = dyn_cast<GlobalValue>(Const)) { Ice::GlobalDeclaration *Decl = getConverter().getGlobalDeclaration(GV); @@ -197,9 +195,8 @@ return IceTy; } - // Given an LLVM instruction and an operand number, produce the - // Ice::Operand this refers to. If there's no such operand, return - // nullptr. + // Given an LLVM instruction and an operand number, produce the Ice::Operand + // this refers to. If there's no such operand, return nullptr. Ice::Operand *convertOperand(const Instruction *Inst, unsigned OpNum) { if (OpNum >= Inst->getNumOperands()) { return nullptr; @@ -551,8 +548,8 @@ Ice::Variable *Dest = mapValueToIceVar(Inst); Ice::Operand *CallTarget = convertValue(Inst->getCalledValue()); unsigned NumArgs = Inst->getNumArgOperands(); - // Note: Subzero doesn't (yet) do anything special with the Tail - // flag in the bitcode, i.e. CallInst::isTailCall(). + // Note: Subzero doesn't (yet) do anything special with the Tail flag in + // the bitcode, i.e. CallInst::isTailCall(). Ice::InstCall *NewInst = nullptr; const Ice::Intrinsics::FullIntrinsicInfo *Info = nullptr; @@ -649,8 +646,8 @@ // Converter from LLVM global variables to ICE. The entry point is the // convertGlobalsToIce method. // -// Note: this currently assumes that the given IR was verified to be -// valid PNaCl bitcode. Othewise, the behavior is undefined. +// Note: this currently assumes that the given IR was verified to be valid +// PNaCl bitcode. Otherwise, the behavior is undefined. class LLVM2ICEGlobalsConverter : public LLVM2ICEConverter { LLVM2ICEGlobalsConverter() = delete; LLVM2ICEGlobalsConverter(const LLVM2ICEGlobalsConverter &) = delete; @@ -661,15 +658,14 @@ explicit LLVM2ICEGlobalsConverter(Ice::Converter &Converter) : LLVM2ICEConverter(Converter) {} - /// Converts global variables, and their initializers into ICE - /// global variable declarations, for module Mod. Returns the set of - /// converted declarations. + /// Converts global variables, and their initializers into ICE global variable + /// declarations, for module Mod. Returns the set of converted declarations. std::unique_ptr<Ice::VariableDeclarationList> convertGlobalsToIce(Module *Mod); private: - // Adds the Initializer to the list of initializers for the Global - // variable declaraation. + // Adds the Initializer to the list of initializers for the Global variable + // declaration. void addGlobalInitializer(Ice::VariableDeclaration &Global, const Constant *Initializer) { const bool HasOffset = false; @@ -678,15 +674,14 @@ } // Adds Initializer to the list of initializers for Global variable - // declaration. HasOffset is true only if Initializer is a - // relocation initializer and Offset should be added to the - // relocation. + // declaration. HasOffset is true only if Initializer is a relocation + // initializer and Offset should be added to the relocation. void addGlobalInitializer(Ice::VariableDeclaration &Global, const Constant *Initializer, bool HasOffset, Ice::RelocOffsetT Offset); - // Converts the given constant C to the corresponding integer - // literal it contains. + // Converts the given constant C to the corresponding integer literal it + // contains. Ice::RelocOffsetT getIntegerLiteralConstant(const Value *C) { const auto CI = dyn_cast<ConstantInt>(C); if (CI && CI->getType()->isIntegerTy(32))
diff --git a/src/IceConverter.h b/src/IceConverter.h index 399e6f8..accc554 100644 --- a/src/IceConverter.h +++ b/src/IceConverter.h
@@ -42,8 +42,8 @@ llvm::Module *getModule() const { return Mod; } - /// Returns the global declaration associated with the corresponding - /// global value V. If no such global address, generates fatal error. + /// Returns the global declaration associated with the corresponding global + /// value V. If no such global address, generates fatal error. GlobalDeclaration *getGlobalDeclaration(const llvm::GlobalValue *V); private: @@ -56,9 +56,8 @@ /// getFlags().DefaultGlobalPrefix, if the prefix is non-empty. void nameUnnamedGlobalVariables(llvm::Module *Mod); - /// Walks module and generates names for unnamed functions using - /// prefix getFlags().DefaultFunctionPrefix, if the prefix is - /// non-empty. + /// Walks module and generates names for unnamed functions using prefix + /// getFlags().DefaultFunctionPrefix, if the prefix is non-empty. void nameUnnamedFunctions(llvm::Module *Mod); /// Converts functions to ICE, and then machine code.
diff --git a/src/IceDefs.h b/src/IceDefs.h index d1ad81c..a38da03 100644 --- a/src/IceDefs.h +++ b/src/IceDefs.h
@@ -9,7 +9,7 @@ /// /// \file /// This file declares various useful types and classes that have widespread use -/// across Subzero. Every Subzero source file is expected to include IceDefs.h. +/// across Subzero. Every Subzero source file is expected to include IceDefs.h. /// //===----------------------------------------------------------------------===// @@ -99,8 +99,8 @@ } // makeUnique should be used when memory is expected to be allocated from the -// heap (as opposed to allocated from some Allocator.) It is intended to be used -// instead of new. +// heap (as opposed to allocated from some Allocator.) It is intended to be +// used instead of new. // // The expected usage is as follows // @@ -140,8 +140,8 @@ using IceString = std::string; using InstList = llvm::ilist<Inst>; -// Ideally PhiList would be llvm::ilist<InstPhi>, and similar for -// AssignList, but this runs into issues with SFINAE. +// Ideally PhiList would be llvm::ilist<InstPhi>, and similar for AssignList, +// but this runs into issues with SFINAE. using PhiList = InstList; using AssignList = InstList; @@ -155,18 +155,18 @@ using FunctionDeclarationList = std::vector<FunctionDeclaration *>; using VariableDeclarationList = std::vector<VariableDeclaration *>; -/// SizeT is for holding small-ish limits like number of source -/// operands in an instruction. It is used instead of size_t (which -/// may be 64-bits wide) when we want to save space. +/// SizeT is for holding small-ish limits like number of source operands in an +/// instruction. It is used instead of size_t (which may be 64-bits wide) when +/// we want to save space. using SizeT = uint32_t; -/// InstNumberT is for holding an instruction number. Instruction -/// numbers are used for representing Variable live ranges. +/// InstNumberT is for holding an instruction number. Instruction numbers are +/// used for representing Variable live ranges. using InstNumberT = int32_t; -/// A LiveBeginEndMapEntry maps a Variable::Number value to an -/// Inst::Number value, giving the instruction number that begins or -/// ends a variable's live range. +/// A LiveBeginEndMapEntry maps a Variable::Number value to an Inst::Number +/// value, giving the instruction number that begins or ends a variable's live +/// range. using LiveBeginEndMapEntry = std::pair<SizeT, InstNumberT>; using LiveBeginEndMap = std::vector<LiveBeginEndMapEntry, CfgLocalAllocator<LiveBeginEndMapEntry>>; @@ -175,9 +175,8 @@ using TimerStackIdT = uint32_t; using TimerIdT = uint32_t; -/// Use alignas(MaxCacheLineSize) to isolate variables/fields that -/// might be contended while multithreading. Assumes the maximum cache -/// line size is 64. +/// Use alignas(MaxCacheLineSize) to isolate variables/fields that might be +/// contended while multithreading. Assumes the maximum cache line size is 64. enum { MaxCacheLineSize = 64 }; // Use ICE_CACHELINE_BOUNDARY to force the next field in a declaration // list to be aligned to the next cache line. @@ -191,15 +190,15 @@ enum { RelocAddrSize = 4 }; enum LivenessMode { - /// Basic version of live-range-end calculation. Marks the last uses - /// of variables based on dataflow analysis. Records the set of - /// live-in and live-out variables for each block. Identifies and - /// deletes dead instructions (primarily stores). + /// Basic version of live-range-end calculation. Marks the last uses of + /// variables based on dataflow analysis. Records the set of live-in and + /// live-out variables for each block. Identifies and deletes dead + /// instructions (primarily stores). Liveness_Basic, - /// In addition to Liveness_Basic, also calculate the complete - /// live range for each variable in a form suitable for interference - /// calculation and register allocation. + /// In addition to Liveness_Basic, also calculate the complete live range for + /// each variable in a form suitable for interference calculation and register + /// allocation. Liveness_Intervals }; @@ -244,10 +243,10 @@ enum ErrorCodes { EC_None = 0, EC_Args, EC_Bitcode, EC_Translation }; -/// Wrapper around std::error_code for allowing multiple errors to be -/// folded into one. The current implementation keeps track of the -/// first error, which is likely to be the most useful one, and this -/// could be extended to e.g. collect a vector of errors. +/// Wrapper around std::error_code for allowing multiple errors to be folded +/// into one. The current implementation keeps track of the first error, which +/// is likely to be the most useful one, and this could be extended to e.g. +/// collect a vector of errors. class ErrorCode : public std::error_code { ErrorCode(const ErrorCode &) = delete; ErrorCode &operator=(const ErrorCode &) = delete;
diff --git a/src/IceELFObjectWriter.cpp b/src/IceELFObjectWriter.cpp index 9e1d44e..7456856 100644 --- a/src/IceELFObjectWriter.cpp +++ b/src/IceELFObjectWriter.cpp
@@ -105,10 +105,9 @@ ELFRelocationSection * ELFObjectWriter::createRelocationSection(const ELFSection *RelatedSection) { - // Choice of RELA vs REL is actually separate from elf64 vs elf32, - // but in practice we've only had .rela for elf64 (x86-64). - // In the future, the two properties may need to be decoupled - // and the ShEntSize can vary more. + // Choice of RELA vs REL is actually separate from elf64 vs elf32, but in + // practice we've only had .rela for elf64 (x86-64). In the future, the two + // properties may need to be decoupled and the ShEntSize can vary more. const Elf64_Word ShType = ELF64 ? SHT_RELA : SHT_REL; IceString RelPrefix = ELF64 ? ".rela" : ".rel"; IceString RelSectionName = RelPrefix + RelatedSection->getName(); @@ -158,8 +157,8 @@ } void ELFObjectWriter::assignSectionNumbersInfo(SectionList &AllSections) { - // Go through each section, assigning them section numbers and - // and fill in the size for sections that aren't incrementally updated. + // Go through each section, assigning them section numbers and and fill in + // the size for sections that aren't incrementally updated. assert(!SectionNumbersAssigned); SizeT CurSectionNumber = 0; NullSection->setNumber(CurSectionNumber++); @@ -233,8 +232,8 @@ RelSection = RelTextSections[0]; } RelocOffsetT OffsetInSection = Section->getCurrentSize(); - // Function symbols are set to 0 size in the symbol table, - // in contrast to data symbols which have a proper size. + // Function symbols are set to 0 size in the symbol table, in contrast to + // data symbols which have a proper size. SizeT SymbolSize = 0; Section->appendData(Str, Asm->getBufferView()); uint8_t SymbolType; @@ -268,9 +267,8 @@ return ELFObjectWriter::BSS; } -// Partition the Vars list by SectionType into VarsBySection. -// If TranslateOnly is non-empty, then only the TranslateOnly variable -// is kept for emission. +// Partition the Vars list by SectionType into VarsBySection. If TranslateOnly +// is non-empty, then only the TranslateOnly variable is kept for emission. void partitionGlobalsBySection(const VariableDeclarationList &Vars, VariableDeclarationList VarsBySection[], const IceString &TranslateOnly) { @@ -440,8 +438,8 @@ void ELFObjectWriter::writeELFHeaderInternal(Elf64_Off SectionHeaderOffset, SizeT SectHeaderStrIndex, SizeT NumSections) { - // Write the e_ident: magic number, class, etc. - // The e_ident is byte order and ELF class independent. + // Write the e_ident: magic number, class, etc. The e_ident is byte order and + // ELF class independent. Str.writeBytes(llvm::StringRef(ElfMagic, strlen(ElfMagic))); Str.write8(IsELF64 ? ELFCLASS64 : ELFCLASS32); Str.write8(ELFDATA2LSB); @@ -451,21 +449,21 @@ Str.write8(ELF_ABIVersion); Str.writeZeroPadding(EI_NIDENT - EI_PAD); - // TODO(jvoung): Handle and test > 64K sections. See the generic ABI doc: - // https://refspecs.linuxbase.org/elf/gabi4+/ch4.eheader.html - // e_shnum should be 0 and then actual number of sections is - // stored in the sh_size member of the 0th section. + // TODO(jvoung): Handle and test > 64K sections. See the generic ABI doc: + // https://refspecs.linuxbase.org/elf/gabi4+/ch4.eheader.html e_shnum should + // be 0 and then actual number of sections is stored in the sh_size member of + // the 0th section. assert(NumSections < SHN_LORESERVE); assert(SectHeaderStrIndex < SHN_LORESERVE); const TargetArch Arch = Ctx.getFlags().getTargetArch(); - // Write the rest of the file header, which does depend on byte order - // and ELF class. + // Write the rest of the file header, which does depend on byte order and ELF + // class. Str.writeLE16(ET_REL); // e_type Str.writeLE16(getELFMachine(Ctx.getFlags().getTargetArch())); // e_machine Str.writeELFWord<IsELF64>(1); // e_version - // Since this is for a relocatable object, there is no entry point, - // and no program headers. + // Since this is for a relocatable object, there is no entry point, and no + // program headers. Str.writeAddrOrOffset<IsELF64>(0); // e_entry Str.writeAddrOrOffset<IsELF64>(0); // e_phoff Str.writeAddrOrOffset<IsELF64>(SectionHeaderOffset); // e_shoff @@ -505,8 +503,8 @@ SecStrBuf.str(), SHT_PROGBITS, ShFlags, Align, WriteAmt); RODataSections.push_back(Section); SizeT OffsetInSection = 0; - // The symbol table entry doesn't need to know the defined symbol's - // size since this is in a section with a fixed Entry Size. + // The symbol table entry doesn't need to know the defined symbol's size + // since this is in a section with a fixed Entry Size. const SizeT SymbolSize = 0; Section->setFileOffset(alignFileOffset(Align)); @@ -541,11 +539,11 @@ Section->setSize(OffsetInSection); } -// Instantiate known needed versions of the template, since we are -// defining the function in the .cpp file instead of the .h file. -// We may need to instantiate constant pools for integers as well -// if we do constant-pooling of large integers to remove them -// from the instruction stream (fewer bytes controlled by an attacker). +// Instantiate known needed versions of the template, since we are defining the +// function in the .cpp file instead of the .h file. We may need to instantiate +// constant pools for integers as well if we do constant-pooling of large +// integers to remove them from the instruction stream (fewer bytes controlled +// by an attacker). template void ELFObjectWriter::writeConstantPool<ConstantFloat>(Type Ty); template void ELFObjectWriter::writeConstantPool<ConstantDouble>(Type Ty);
diff --git a/src/IceELFObjectWriter.h b/src/IceELFObjectWriter.h index c1bfb74..60ed60c 100644 --- a/src/IceELFObjectWriter.h +++ b/src/IceELFObjectWriter.h
@@ -24,11 +24,11 @@ namespace Ice { -/// Higher level ELF object writer. Manages section information and writes -/// the final ELF object. The object writer will write to file the code -/// and data as it is being defined (rather than keep a copy). -/// After all definitions are written out, it will finalize the bookkeeping -/// sections and write them out. Expected usage: +/// Higher level ELF object writer. Manages section information and writes the +/// final ELF object. The object writer will write to file the code and data as +/// it is being defined (rather than keep a copy). After all definitions are +/// written out, it will finalize the bookkeeping sections and write them out. +/// Expected usage: /// /// (1) writeInitialELFHeader (invoke once) /// (2) writeDataSection (may be invoked multiple times, as long as @@ -38,9 +38,9 @@ /// (5) setUndefinedSyms (invoke once) /// (6) writeNonUserSections (invoke once) /// -/// The requirement for writeDataSection to be invoked only once can -/// be relaxed if using -fdata-sections. The requirement to invoke only once -/// without -fdata-sections is so that variables that belong to each possible +/// The requirement for writeDataSection to be invoked only once can be relaxed +/// if using -fdata-sections. The requirement to invoke only once without +/// -fdata-sections is so that variables that belong to each possible /// SectionType are contiguous in the file. With -fdata-sections, each global /// variable is in a separate section and therefore the sections will be /// trivially contiguous. @@ -53,27 +53,27 @@ ELFObjectWriter(GlobalContext &Ctx, ELFStreamer &Out); /// Write the initial ELF header. This is just to reserve space in the ELF - /// file. Reserving space allows the other functions to write text - /// and data directly to the file and get the right file offsets. + /// file. Reserving space allows the other functions to write text and data + /// directly to the file and get the right file offsets. void writeInitialELFHeader(); - /// Copy initializer data for globals to file and note the offset and size - /// of each global's definition in the symbol table. - /// Use the given target's RelocationKind for any relocations. + /// Copy initializer data for globals to file and note the offset and size of + /// each global's definition in the symbol table. Use the given target's + /// RelocationKind for any relocations. void writeDataSection(const VariableDeclarationList &Vars, FixupKind RelocationKind, const IceString &SectionSuffix); /// Copy data of a function's text section to file and note the offset of the - /// symbol's definition in the symbol table. - /// Copy the text fixups for use after all functions are written. - /// The text buffer and fixups are extracted from the Assembler object. + /// symbol's definition in the symbol table. Copy the text fixups for use + /// after all functions are written. The text buffer and fixups are extracted + /// from the Assembler object. void writeFunctionCode(const IceString &FuncName, bool IsInternal, const Assembler *Asm); - /// Queries the GlobalContext for constant pools of the given type - /// and writes out read-only data sections for those constants. This also - /// fills the symbol table with labels for each constant pool entry. + /// Queries the GlobalContext for constant pools of the given type and writes + /// out read-only data sections for those constants. This also fills the + /// symbol table with labels for each constant pool entry. template <typename ConstType> void writeConstantPool(Type Ty); /// Write a jump table and register fixups for the target addresses. @@ -82,12 +82,12 @@ /// Populate the symbol table with a list of external/undefined symbols. void setUndefinedSyms(const ConstantList &UndefSyms); - /// Do final layout and write out the rest of the object file. - /// Finally, patch up the initial ELF header with the final info. + /// Do final layout and write out the rest of the object file. Finally, patch + /// up the initial ELF header with the final info. void writeNonUserSections(); - /// Which type of ELF section a global variable initializer belongs to. - /// This is used as an array index so should start at 0 and be contiguous. + /// Which type of ELF section a global variable initializer belongs to. This + /// is used as an array index so should start at 0 and be contiguous. enum SectionType { ROData = 0, Data, BSS, NumSectionTypes }; private: @@ -120,25 +120,25 @@ Elf64_Xword ShFlags, Elf64_Xword ShAddralign, Elf64_Xword ShEntsize); - /// Create a relocation section, given the related section - /// (e.g., .text, .data., .rodata). + /// Create a relocation section, given the related section (e.g., .text, + /// .data., .rodata). ELFRelocationSection * createRelocationSection(const ELFSection *RelatedSection); - /// Align the file position before writing out a section's data, - /// and return the position of the file. + /// Align the file position before writing out a section's data, and return + /// the position of the file. Elf64_Off alignFileOffset(Elf64_Xword Align); - /// Assign an ordering / section numbers to each section. - /// Fill in other information that is only known near the end - /// (such as the size, if it wasn't already incrementally updated). - /// This then collects all sections in the decided order, into one vector, - /// for conveniently writing out all of the section headers. + /// Assign an ordering / section numbers to each section. Fill in other + /// information that is only known near the end (such as the size, if it + /// wasn't already incrementally updated). This then collects all sections in + /// the decided order, into one vector, for conveniently writing out all of + /// the section headers. void assignSectionNumbersInfo(SectionList &AllSections); - /// This function assigns .foo and .rel.foo consecutive section numbers. - /// It also sets the relocation section's sh_info field to the related - /// section's number. + /// This function assigns .foo and .rel.foo consecutive section numbers. It + /// also sets the relocation section's sh_info field to the related section's + /// number. template <typename UserSectionList> void assignRelSectionNumInPairs(SizeT &CurSectionNumber, UserSectionList &UserSections, @@ -156,9 +156,9 @@ FixupKind RelocationKind, const IceString &SectionSuffix); - /// Write the final relocation sections given the final symbol table. - /// May also be able to seek around the file and resolve function calls - /// that are for functions within the same section. + /// Write the final relocation sections given the final symbol table. May also + /// be able to seek around the file and resolve function calls that are for + /// functions within the same section. void writeAllRelocationSections(); void writeRelocationSections(RelSectionList &RelSections);
diff --git a/src/IceELFSection.cpp b/src/IceELFSection.cpp index 7893354..3e33c99 100644 --- a/src/IceELFSection.cpp +++ b/src/IceELFSection.cpp
@@ -82,8 +82,8 @@ // Symbol tables. void ELFSymbolTableSection::createNullSymbol(ELFSection *NullSection) { - // The first entry in the symbol table should be a NULL entry, - // so make sure the map is still empty. + // The first entry in the symbol table should be a NULL entry, so make sure + // the map is still empty. assert(LocalSymbols.empty()); const IceString NullSymName(""); createDefinedSym(NullSymName, STT_NOTYPE, STB_LOCAL, NullSection, 0, 0); @@ -208,8 +208,8 @@ assert(StringIndex.second == UnknownIndex); llvm::StringRef Cur = llvm::StringRef(StringIndex.first); if (Prev.endswith(Cur)) { - // Prev is already in the StringData, and Cur is shorter than Prev - // based on the sort. + // Prev is already in the StringData, and Cur is shorter than Prev based + // on the sort. StringIndex.second = StringData.size() - Cur.size() - 1; continue; }
diff --git a/src/IceELFSection.h b/src/IceELFSection.h index 92dc02b..636f3b4 100644 --- a/src/IceELFSection.h +++ b/src/IceELFSection.h
@@ -36,15 +36,15 @@ public: virtual ~ELFSection() = default; - /// Sentinel value for a section number/index for before the final - /// section index is actually known. The dummy NULL section will be assigned - /// number 0, and it is referenced by the dummy 0-th symbol in the symbol - /// table, so use max() instead of 0. + /// Sentinel value for a section number/index for before the final section + /// index is actually known. The dummy NULL section will be assigned number 0, + /// and it is referenced by the dummy 0-th symbol in the symbol table, so use + /// max() instead of 0. enum { NoSectionNumber = std::numeric_limits<SizeT>::max() }; - /// Constructs an ELF section, filling in fields that will be known - /// once the *type* of section is decided. Other fields may be updated - /// incrementally or only after the program is completely defined. + /// Constructs an ELF section, filling in fields that will be known once the + /// *type* of section is decided. Other fields may be updated incrementally or + /// only after the program is completely defined. ELFSection(const IceString &Name, Elf64_Word ShType, Elf64_Xword ShFlags, Elf64_Xword ShAddralign, Elf64_Xword ShEntsize) : Name(Name), Header() { @@ -84,8 +84,8 @@ template <bool IsELF64> void writeHeader(ELFStreamer &Str); protected: - /// Name of the section in convenient string form (instead of a index - /// into the Section Header String Table, which is not known till later). + /// Name of the section in convenient string form (instead of a index into the + /// Section Header String Table, which is not known till later). const IceString Name; // The fields of the header. May only be partially initialized, but should @@ -96,8 +96,8 @@ SizeT Number = NoSectionNumber; }; -/// Models text/code sections. Code is written out incrementally and the -/// size of the section is then updated incrementally. +/// Models text/code sections. Code is written out incrementally and the size of +/// the section is then updated incrementally. class ELFTextSection : public ELFSection { ELFTextSection() = delete; ELFTextSection(const ELFTextSection &) = delete; @@ -109,9 +109,9 @@ void appendData(ELFStreamer &Str, const llvm::StringRef MoreData); }; -/// Models data/rodata sections. Data is written out incrementally and the -/// size of the section is then updated incrementally. -/// Some rodata sections may have fixed entsize and duplicates may be mergeable. +/// Models data/rodata sections. Data is written out incrementally and the size +/// of the section is then updated incrementally. Some rodata sections may have +/// fixed entsize and duplicates may be mergeable. class ELFDataSection : public ELFSection { ELFDataSection() = delete; ELFDataSection(const ELFDataSection &) = delete; @@ -128,8 +128,8 @@ RelocOffsetT RelocOffset); /// Pad the next section offset for writing data elements to the requested - /// alignment. If the section is NOBITS then do not actually write out - /// the padding and only update the section size. + /// alignment. If the section is NOBITS then do not actually write out the + /// padding and only update the section size. void padToAlignment(ELFStreamer &Str, Elf64_Xword Align); }; @@ -141,8 +141,8 @@ ELFSection *Section; SizeT Number; - /// Sentinel value for symbols that haven't been assigned a number yet. - /// The dummy 0-th symbol will be assigned number 0, so don't use that. + /// Sentinel value for symbols that haven't been assigned a number yet. The + /// dummy 0-th symbol will be assigned number 0, so don't use that. enum { UnknownNumber = std::numeric_limits<SizeT>::max() }; void setNumber(SizeT N) { @@ -170,16 +170,15 @@ : ELFSection(Name, ShType, ShFlags, ShAddralign, ShEntsize), NullSymbol(nullptr) {} - /// Create initial entry for a symbol when it is defined. - /// Each entry should only be defined once. - /// We might want to allow Name to be a dummy name initially, then - /// get updated to the real thing, since Data initializers are read - /// before the bitcode's symbol table is read. + /// Create initial entry for a symbol when it is defined. Each entry should + /// only be defined once. We might want to allow Name to be a dummy name + /// initially, then get updated to the real thing, since Data initializers are + /// read before the bitcode's symbol table is read. void createDefinedSym(const IceString &Name, uint8_t Type, uint8_t Binding, ELFSection *Section, RelocOffsetT Offset, SizeT Size); - /// Note that a symbol table entry needs to be created for the given - /// symbol because it is undefined. + /// Note that a symbol table entry needs to be created for the given symbol + /// because it is undefined. void noteUndefinedSym(const IceString &Name, ELFSection *NullSection); const ELFSym *findSymbol(const IceString &Name) const; @@ -198,8 +197,8 @@ void writeData(ELFStreamer &Str, bool IsELF64); private: - // Map from symbol name to its symbol information. - // This assumes symbols are unique across all sections. + // Map from symbol name to its symbol information. This assumes symbols are + // unique across all sections. using SymtabKey = IceString; using SymMap = std::map<SymtabKey, ELFSym>; @@ -207,8 +206,8 @@ void writeSymbolMap(ELFStreamer &Str, const SymMap &Map); const ELFSym *NullSymbol; - // Keep Local and Global symbols separate, since the sh_info needs to - // know the index of the last LOCAL. + // Keep Local and Global symbols separate, since the sh_info needs to know + // the index of the last LOCAL. SymMap LocalSymbols; SymMap GlobalSymbols; }; @@ -231,8 +230,8 @@ RelatedSection = Section; } - /// Track additional relocations which start out relative to offset 0, - /// but should be adjusted to be relative to BaseOff. + /// Track additional relocations which start out relative to offset 0, but + /// should be adjusted to be relative to BaseOff. void addRelocations(RelocOffsetT BaseOff, const FixupRefList &FixupRefs); /// Track a single additional relocation. @@ -251,12 +250,11 @@ FixupList Fixups; }; -/// Models a string table. The user will build the string table by -/// adding strings incrementally. At some point, all strings should be -/// known and doLayout() should be called. After that, no other -/// strings may be added. However, the final offsets of the strings -/// can be discovered and used to fill out section headers and symbol -/// table entries. +/// Models a string table. The user will build the string table by adding +/// strings incrementally. At some point, all strings should be known and +/// doLayout() should be called. After that, no other strings may be added. +/// However, the final offsets of the strings can be discovered and used to fill +/// out section headers and symbol table entries. class ELFStringTableSection : public ELFSection { ELFStringTableSection() = delete; ELFStringTableSection(const ELFStringTableSection &) = delete; @@ -271,12 +269,12 @@ /// Finalizes the layout of the string table and fills in the section Data. void doLayout(); - /// The first byte of the string table should be \0, so it is an - /// invalid index. Indices start out as unknown until layout is complete. + /// The first byte of the string table should be \0, so it is an invalid + /// index. Indices start out as unknown until layout is complete. enum { UnknownIndex = 0 }; - /// Grabs the final index of a string after layout. Returns UnknownIndex - /// if the string's index is not found. + /// Grabs the final index of a string after layout. Returns UnknownIndex if + /// the string's index is not found. size_t getIndex(const IceString &Str) const; llvm::StringRef getSectionData() const { @@ -290,19 +288,19 @@ private: bool isLaidOut() const { return !StringData.empty(); } - /// Strings can share a string table entry if they share the same - /// suffix. E.g., "pop" and "lollipop" can both use the characters - /// in "lollipop", but "pops" cannot, and "unpop" cannot either. - /// Though, "pop", "lollipop", and "unpop" share "pop" as the suffix, - /// "pop" can only share the characters with one of them. + /// Strings can share a string table entry if they share the same suffix. + /// E.g., "pop" and "lollipop" can both use the characters in "lollipop", but + /// "pops" cannot, and "unpop" cannot either. Though, "pop", "lollipop", and + /// "unpop" share "pop" as the suffix, "pop" can only share the characters + /// with one of them. struct SuffixComparator { bool operator()(const IceString &StrA, const IceString &StrB) const; }; using StringToIndexType = std::map<IceString, size_t, SuffixComparator>; - /// Track strings to their index. Index will be UnknownIndex if not - /// yet laid out. + /// Track strings to their index. Index will be UnknownIndex if not yet laid + /// out. StringToIndexType StringToIndexMap; using RawDataType = std::vector<uint8_t>;
diff --git a/src/IceELFStreamer.h b/src/IceELFStreamer.h index 93051b9..ab99891 100644 --- a/src/IceELFStreamer.h +++ b/src/IceELFStreamer.h
@@ -20,8 +20,8 @@ namespace Ice { -/// Low level writer that can that can handle ELFCLASS32/64. -/// Little endian only for now. +/// Low level writer that can that can handle ELFCLASS32/64. Little endian only +/// for now. class ELFStreamer { ELFStreamer() = delete; ELFStreamer(const ELFStreamer &) = delete;
diff --git a/src/IceFixups.cpp b/src/IceFixups.cpp index ff7916c..a86d985 100644 --- a/src/IceFixups.cpp +++ b/src/IceFixups.cpp
@@ -40,8 +40,8 @@ else Str << Ctx->mangleName(CR->getName()); } else { - // NOTE: currently only float/doubles are put into constant pools. - // In the future we may put integers as well. + // NOTE: currently only float/doubles are put into constant pools. In the + // future we may put integers as well. assert(llvm::isa<ConstantFloat>(C) || llvm::isa<ConstantDouble>(C)); C->emitPoolLabel(Str); }
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp index b4da1b6..7b7183d 100644 --- a/src/IceGlobalContext.cpp +++ b/src/IceGlobalContext.cpp
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file defines aspects of the compilation that persist across -/// multiple functions. +/// This file defines aspects of the compilation that persist across multiple +/// functions. /// //===----------------------------------------------------------------------===// @@ -48,9 +48,9 @@ namespace { -// Define the key comparison function for the constant pool's -// unordered_map, but only for key types of interest: integer types, -// floating point types, and the special RelocatableTuple. +// Define the key comparison function for the constant pool's unordered_map, +// but only for key types of interest: integer types, floating point types, and +// the special RelocatableTuple. template <typename KeyType, class Enable = void> struct KeyCompare {}; template <typename KeyType> @@ -70,9 +70,9 @@ } }; -// Define a key comparison function for sorting the constant pool's -// values after they are dumped to a vector. This covers integer -// types, floating point types, and ConstantRelocatable values. +// Define a key comparison function for sorting the constant pool's values +// after they are dumped to a vector. This covers integer types, floating point +// types, and ConstantRelocatable values. template <typename ValueType, class Enable = void> struct KeyCompareLess {}; template <typename ValueType> @@ -601,8 +601,8 @@ assert(OldName[OldPos - 1] == 'S'); assert(OldName[OldPos + Length] == '_'); if (AllZs) { - // Replace N 'Z' characters with a '0' (if N=0) or '1' (if - // N>0) followed by N '0' characters. + // Replace N 'Z' characters with a '0' (if N=0) or '1' (if N>0) + // followed by N '0' characters. NewName[NewPos++] = (Length ? '1' : '0'); for (size_t i = 0; i < Length; ++i) { NewName[NewPos++] = '0'; @@ -642,16 +642,15 @@ OldName = NewName; } -// In this context, name mangling means to rewrite a symbol using a -// given prefix. For a C++ symbol, nest the original symbol inside -// the "prefix" namespace. For other symbols, just prepend the -// prefix. +// In this context, name mangling means to rewrite a symbol using a given +// prefix. For a C++ symbol, nest the original symbol inside the "prefix" +// namespace. For other symbols, just prepend the prefix. IceString GlobalContext::mangleName(const IceString &Name) const { - // An already-nested name like foo::bar() gets pushed down one - // level, making it equivalent to Prefix::foo::bar(). + // An already-nested name like foo::bar() gets pushed down one level, making + // it equivalent to Prefix::foo::bar(). // _ZN3foo3barExyz ==> _ZN6Prefix3foo3barExyz - // A non-nested but mangled name like bar() gets nested, making it - // equivalent to Prefix::bar(). + // A non-nested but mangled name like bar() gets nested, making it equivalent + // to Prefix::bar(). // _Z3barxyz ==> ZN6Prefix3barExyz // An unmangled, extern "C" style name, gets a simple prefix: // bar ==> Prefixbar @@ -671,28 +670,27 @@ // (splice in "6Prefix") ^^^^^^^ snprintf(NewName.data(), BufLen, "_ZN%u%s%s", PrefixLength, TestPrefix.c_str(), NameBase.data()); - // We ignore the snprintf return value (here and below). If we - // somehow miscalculated the output buffer length, the output will - // be truncated, but it will be truncated consistently for all - // mangleName() calls on the same input string. + // We ignore the snprintf return value (here and below). If we somehow + // miscalculated the output buffer length, the output will be truncated, + // but it will be truncated consistently for all mangleName() calls on the + // same input string. incrementSubstitutions(NewName); return NewName.data(); } - // Artificially limit BaseLength to 9 digits (less than 1 billion) - // because sscanf behavior is undefined on integer overflow. If - // there are more than 9 digits (which we test by looking at the - // beginning of NameBase), then we consider this a failure to parse - // a namespace mangling, and fall back to the simple prefixing. + // Artificially limit BaseLength to 9 digits (less than 1 billion) because + // sscanf behavior is undefined on integer overflow. If there are more than 9 + // digits (which we test by looking at the beginning of NameBase), then we + // consider this a failure to parse a namespace mangling, and fall back to + // the simple prefixing. ItemsParsed = sscanf(Name.c_str(), "_Z%9u%s", &BaseLength, NameBase.data()); if (ItemsParsed == 2 && BaseLength <= strlen(NameBase.data()) && !isdigit(NameBase[0])) { // Transform _Z3barxyz ==> _ZN6Prefix3barExyz // ^^^^^^^^ ^ - // (splice in "N6Prefix", and insert "E" after "3bar") - // But an "I" after the identifier indicates a template argument - // list terminated with "E"; insert the new "E" before/after the - // old "E". E.g.: + // (splice in "N6Prefix", and insert "E" after "3bar") But an "I" after the + // identifier indicates a template argument list terminated with "E"; + // insert the new "E" before/after the old "E". E.g.: // Transform _Z3barIabcExyz ==> _ZN6Prefix3barIabcEExyz // ^^^^^^^^ ^ // (splice in "N6Prefix", and insert "E" after "3barIabcE") @@ -730,8 +728,8 @@ } } -// TODO(stichnot): Consider adding thread-local caches of constant -// pool entries to reduce contention. +// TODO(stichnot): Consider adding thread-local caches of constant pool entries +// to reduce contention. // All locking is done by the getConstantInt[0-9]+() target function. Constant *GlobalContext::getConstantInt(Type Ty, int64_t Value) { @@ -875,8 +873,8 @@ JumpTableDataList GlobalContext::getJumpTables() { JumpTableDataList JumpTables(*getJumpTableList()); - // Make order deterministic by sorting into functions and then ID of the - // jump table within that function. + // Make order deterministic by sorting into functions and then ID of the jump + // table within that function. std::sort(JumpTables.begin(), JumpTables.end(), [](const JumpTableData &A, const JumpTableData &B) { if (A.getFunctionName() != B.getFunctionName()) @@ -946,11 +944,10 @@ Timers->at(StackID).setName(NewName); } -// Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr -// at the interface to take and transfer ownership, but they -// internally store the raw Cfg pointer in the work queue. This -// allows e.g. future queue optimizations such as the use of atomics -// to modify queue elements. +// Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr at the +// interface to take and transfer ownership, but they internally store the raw +// Cfg pointer in the work queue. This allows e.g. future queue optimizations +// such as the use of atomics to modify queue elements. void GlobalContext::optQueueBlockingPush(std::unique_ptr<Cfg> Func) { assert(Func); OptQ.blockingPush(Func.release());
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h index 8a747f8..baab9ca 100644 --- a/src/IceGlobalContext.h +++ b/src/IceGlobalContext.h
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares aspects of the compilation that persist across -/// multiple functions. +/// This file declares aspects of the compilation that persist across multiple +/// functions. /// //===----------------------------------------------------------------------===// @@ -186,9 +186,10 @@ /// translators using the same bitcode as input. IceString mangleName(const IceString &Name) const; - // Manage Constants. - // getConstant*() functions are not const because they might add - // something to the constant pool. + /// \name Manage Constants. + /// @{ + // getConstant*() functions are not const because they might add something to + // the constant pool. Constant *getConstantInt(Type Ty, int64_t Value); Constant *getConstantInt1(int8_t ConstantInt1); Constant *getConstantInt8(int8_t ConstantInt8); @@ -205,11 +206,12 @@ Constant *getConstantUndef(Type Ty); /// Returns a zero value. Constant *getConstantZero(Type Ty); - /// getConstantPool() returns a copy of the constant pool for - /// constants of a given type. + /// getConstantPool() returns a copy of the constant pool for constants of a + /// given type. ConstantList getConstantPool(Type Ty); /// Returns a copy of the list of external symbols. ConstantList getConstantExternSyms(); + /// @} /// Return a locked pointer to the registered jump tables. JumpTableDataList getJumpTables(); @@ -299,36 +301,35 @@ /// These are predefined TimerStackIdT values. enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num }; - /// newTimerStackID() creates a new TimerStack in the global space. - /// It does not affect any TimerStack objects in TLS. + /// newTimerStackID() creates a new TimerStack in the global space. It does + /// not affect any TimerStack objects in TLS. TimerStackIdT newTimerStackID(const IceString &Name); - /// dumpTimers() dumps the global timer data. As such, one probably - /// wants to call mergeTimerStacks() as a prerequisite. + /// dumpTimers() dumps the global timer data. As such, one probably wants to + /// call mergeTimerStacks() as a prerequisite. void dumpTimers(TimerStackIdT StackID = TSK_Default, bool DumpCumulative = true); - /// The following methods affect only the calling thread's TLS timer - /// data. + /// The following methods affect only the calling thread's TLS timer data. TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); void pushTimer(TimerIdT ID, TimerStackIdT StackID); void popTimer(TimerIdT ID, TimerStackIdT StackID); void resetTimer(TimerStackIdT StackID); void setTimerName(TimerStackIdT StackID, const IceString &NewName); - /// This is the first work item sequence number that the parser - /// produces, and correspondingly the first sequence number that the - /// emitter thread will wait for. Start numbering at 1 to leave room - /// for a sentinel, in case e.g. we wish to inject items with a - /// special sequence number that may be executed out of order. + /// This is the first work item sequence number that the parser produces, and + /// correspondingly the first sequence number that the emitter thread will + /// wait for. Start numbering at 1 to leave room for a sentinel, in case e.g. + /// we wish to inject items with a special sequence number that may be + /// executed out of order. static uint32_t getFirstSequenceNumber() { return 1; } - /// Adds a newly parsed and constructed function to the Cfg work - /// queue. Notifies any idle workers that a new function is - /// available for translating. May block if the work queue is too - /// large, in order to control memory footprint. + /// Adds a newly parsed and constructed function to the Cfg work queue. + /// Notifies any idle workers that a new function is available for + /// translating. May block if the work queue is too large, in order to control + /// memory footprint. void optQueueBlockingPush(std::unique_ptr<Cfg> Func); - /// Takes a Cfg from the work queue for translating. May block if - /// the work queue is currently empty. Returns nullptr if there is - /// no more work - the queue is empty and either end() has been - /// called or the Sequential flag was set. + /// Takes a Cfg from the work queue for translating. May block if the work + /// queue is currently empty. Returns nullptr if there is no more work - the + /// queue is empty and either end() has been called or the Sequential flag was + /// set. std::unique_ptr<Cfg> optQueueBlockingPop(); /// Notifies that no more work will be added to the work queue. void optQueueNotifyEnd() { OptQ.notifyEnd(); } @@ -378,8 +379,8 @@ } TranslationThreads.clear(); - // Only notify the emit queue to end after all the translation - // threads have ended. + // Only notify the emit queue to end after all the translation threads have + // ended. emitQueueNotifyEnd(); for (std::thread &Worker : EmitterThreads) { Worker.join(); @@ -392,8 +393,8 @@ Timers->mergeFrom(TLS->Timers); } if (BuildDefs::dump()) { - // Do a separate loop over AllThreadContexts to avoid holding - // two locks at once. + // Do a separate loop over AllThreadContexts to avoid holding two locks + // at once. auto Stats = getStatsCumulative(); for (ThreadContext *TLS : AllThreadContexts) Stats->add(TLS->StatsCumulative); @@ -413,8 +414,8 @@ ICE_TLS_SET_FIELD(TLS, MyTLS); emitItems(); } - /// Emit functions and global initializers from the emitter queue - /// until the queue is empty. + /// Emit functions and global initializers from the emitter queue until the + /// queue is empty. void emitItems(); /// Uses DataLowering to lower Globals. Side effects: @@ -425,12 +426,11 @@ /// Lowers the profile information. void lowerProfileData(); - /// Utility function to match a symbol name against a match string. - /// This is used in a few cases where we want to take some action on - /// a particular function or symbol based on a command-line argument, - /// such as changing the verbose level for a particular function. An - /// empty Match argument means match everything. Returns true if - /// there is a match. + /// Utility function to match a symbol name against a match string. This is + /// used in a few cases where we want to take some action on a particular + /// function or symbol based on a command-line argument, such as changing the + /// verbose level for a particular function. An empty Match argument means + /// match everything. Returns true if there is a match. static bool matchSymbolName(const IceString &SymbolName, const IceString &Match) { return Match.empty() || Match == SymbolName; @@ -552,9 +552,9 @@ static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } }; -/// Helper class to push and pop a timer marker. The constructor -/// pushes a marker, and the destructor pops it. This is for -/// convenient timing of regions of code. +/// Helper class to push and pop a timer marker. The constructor pushes a +/// marker, and the destructor pops it. This is for convenient timing of regions +/// of code. class TimerMarker { TimerMarker() = delete; TimerMarker(const TimerMarker &) = delete; @@ -589,8 +589,7 @@ bool Active = false; }; -/// Helper class for locking the streams and then automatically -/// unlocking them. +/// Helper class for locking the streams and then automatically unlocking them. class OstreamLocker { private: OstreamLocker() = delete;
diff --git a/src/IceGlobalInits.cpp b/src/IceGlobalInits.cpp index c95456c..2c1460a 100644 --- a/src/IceGlobalInits.cpp +++ b/src/IceGlobalInits.cpp
@@ -8,9 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file implements the notion of function declarations, global -/// variable declarations, and the corresponding variable initializers -/// in Subzero. +/// This file implements the notion of function declarations, global variable +/// declarations, and the corresponding variable initializers in Subzero. /// //===----------------------------------------------------------------------===// @@ -152,9 +151,8 @@ return; dumpType(Stream); Stream << " c\""; - // Code taken from PrintEscapedString() in AsmWriter.cpp. Keep - // the strings in the same format as the .ll file for practical - // diffing. + // Code taken from PrintEscapedString() in AsmWriter.cpp. Keep the strings in + // the same format as the .ll file for practical diffing. for (uint8_t C : Contents) { if (isprint(C) && C != '\\' && C != '"') Stream << C;
diff --git a/src/IceGlobalInits.h b/src/IceGlobalInits.h index c15aed0..8f51db2 100644 --- a/src/IceGlobalInits.h +++ b/src/IceGlobalInits.h
@@ -8,10 +8,10 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares the representation of function declarations, -/// global variable declarations, and the corresponding variable -/// initializers in Subzero. Global variable initializers are -/// represented as a sequence of simple initializers. +/// This file declares the representation of function declarations, global +/// variable declarations, and the corresponding variable initializers in +/// Subzero. Global variable initializers are represented as a sequence of +/// simple initializers. /// //===----------------------------------------------------------------------===// @@ -81,8 +81,8 @@ /// Returns true if when emitting names, we should suppress mangling. virtual bool getSuppressMangling() const = 0; - /// Mangles name for cross tests, unless external and not defined locally - /// (so that relocations accross pnacl-sz and pnacl-llc will work). + /// Mangles name for cross tests, unless external and not defined locally (so + /// that relocations across pnacl-sz and pnacl-llc will work). virtual IceString mangleName(GlobalContext *Ctx) const { return getSuppressMangling() ? Name : Ctx->mangleName(Name); } @@ -97,8 +97,8 @@ llvm::GlobalValue::LinkageTypes Linkage; }; -/// Models a function declaration. This includes the type signature of -/// the function, its calling conventions, and its linkage. +/// Models a function declaration. This includes the type signature of the +/// function, its calling conventions, and its linkage. class FunctionDeclaration : public GlobalDeclaration { FunctionDeclaration() = delete; FunctionDeclaration(const FunctionDeclaration &) = delete; @@ -286,8 +286,7 @@ llvm::isa<ZeroInitializer>((*Initializers)[0].get())); } - /// Returns the number of bytes for the initializer of the global - /// address. + /// Returns the number of bytes for the initializer of the global address. SizeT getNumBytes() const { SizeT Count = 0; for (const std::unique_ptr<Initializer> &Init : *Initializers) { @@ -296,19 +295,18 @@ return Count; } - /// Adds Initializer to the list of initializers. Takes ownership of - /// the initializer. + /// Adds Initializer to the list of initializers. Takes ownership of the + /// initializer. void addInitializer(std::unique_ptr<Initializer> Initializer) { Initializers->emplace_back(std::move(Initializer)); HasInitializer = true; } - /// Prints out type for initializer associated with the declaration - /// to Stream. + /// Prints out type for initializer associated with the declaration to Stream. void dumpType(Ostream &Stream) const final; - /// Prints out the definition of the global variable declaration - /// (including initialization). + /// Prints out the definition of the global variable declaration (including + /// initialization). void dump(GlobalContext *Ctx, Ostream &Stream) const final; static bool classof(const GlobalDeclaration *Addr) {
diff --git a/src/IceInst.cpp b/src/IceInst.cpp index b92e954..d9b91f0 100644 --- a/src/IceInst.cpp +++ b/src/IceInst.cpp
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file implements the Inst class, primarily the various -/// subclass constructors and dump routines. +/// This file implements the Inst class, primarily the various subclass +/// constructors and dump routines. /// //===----------------------------------------------------------------------===// @@ -82,15 +82,15 @@ Number = isDeleted() ? NumberDeleted : Func->newInstNumber(); } -// Delete the instruction if its tentative Dead flag is still set -// after liveness analysis. +// Delete the instruction if its tentative Dead flag is still set after +// liveness analysis. void Inst::deleteIfDead() { if (Dead) setDeleted(); } -// If Src is a Variable, it returns true if this instruction ends -// Src's live range. Otherwise, returns false. +// If Src is a Variable, it returns true if this instruction ends Src's live +// range. Otherwise, returns false. bool Inst::isLastUse(const Operand *TestSrc) const { if (LiveRangesEnded == 0) return false; // early-exit optimization @@ -116,17 +116,16 @@ // with SpliceAssn spliced in: // d = [x,y] // -// Reconstruct the LiveRangesEnded bitmask in this instruction by -// combining the LiveRangesEnded values of OrigInst and SpliceAssn. -// If operands d and [x,y] contain a different number of variables, -// then the bitmask position for e may be different in OrigInst and -// the current instruction, requiring extra shifts and masks in the -// computation. In the example above, OrigInst has variable e in bit -// position 3, whereas the current instruction has e in bit position 4 +// Reconstruct the LiveRangesEnded bitmask in this instruction by combining the +// LiveRangesEnded values of OrigInst and SpliceAssn. If operands d and [x,y] +// contain a different number of variables, then the bitmask position for e may +// be different in OrigInst and the current instruction, requiring extra shifts +// and masks in the computation. In the example above, OrigInst has variable e +// in bit position 3, whereas the current instruction has e in bit position 4 // because [x,y] consumes 2 bitmask slots while d only consumed 1. // -// Additionally, set HasSideEffects if either OrigInst or SpliceAssn -// have HasSideEffects set. +// Additionally, set HasSideEffects if either OrigInst or SpliceAssn have +// HasSideEffects set. void Inst::spliceLivenessInfo(Inst *OrigInst, Inst *SpliceAssn) { HasSideEffects |= OrigInst->HasSideEffects; HasSideEffects |= SpliceAssn->HasSideEffects; @@ -184,8 +183,8 @@ } if (Dead) return false; - // Phi arguments only get added to Live in the predecessor node, but - // we still need to update LiveRangesEnded. + // Phi arguments only get added to Live in the predecessor node, but we still + // need to update LiveRangesEnded. bool IsPhi = llvm::isa<InstPhi>(this); resetLastUses(); FOREACH_VAR_IN_INST(Var, *this) { @@ -195,20 +194,21 @@ if (!IsPhi) { Live[VarNum] = true; // For a variable in SSA form, its live range can end at most once in a - // basic block. However, after lowering to two-address instructions, we - // end up with sequences like "t=b;t+=c;a=t" where t's live range begins - // and ends twice. ICE only allows a variable to have a single liveness - // interval in a basic block (except for blocks where a variable is - // live-in and live-out but there is a gap in the middle). Therefore, - // this lowered sequence needs to represent a single conservative live - // range for t. Since the instructions are being traversed backwards, - // we make sure LiveEnd is only set once by setting it only when - // LiveEnd[VarNum]==0 (sentinel value). Note that it's OK to set - // LiveBegin multiple times because of the backwards traversal. + // basic block. However, after lowering to two-address instructions, we + // end up with sequences like "t=b;t+=c;a=t" where t's live range + // begins and ends twice. ICE only allows a variable to have a single + // liveness interval in a basic block (except for blocks where a + // variable is live-in and live-out but there is a gap in the middle). + // Therefore, this lowered sequence needs to represent a single + // conservative live range for t. Since the instructions are being + // traversed backwards, we make sure LiveEnd is only set once by + // setting it only when LiveEnd[VarNum]==0 (sentinel value). Note that + // it's OK to set LiveBegin multiple times because of the backwards + // traversal. if (LiveEnd && Liveness->getRangeMask(Var->getIndex())) { // Ideally, we would verify that VarNum wasn't already added in this // block, but this can't be done very efficiently with LiveEnd as a - // vector. Instead, livenessPostprocess() verifies this after the + // vector. Instead, livenessPostprocess() verifies this after the // vector has been sorted. LiveEnd->push_back(std::make_pair(VarNum, InstNumber)); } @@ -249,9 +249,9 @@ addSource(Source); } -// If TargetTrue==TargetFalse, we turn it into an unconditional -// branch. This ensures that, along with the 'switch' instruction -// semantics, there is at most one edge from one node to another. +// If TargetTrue==TargetFalse, we turn it into an unconditional branch. This +// ensures that, along with the 'switch' instruction semantics, there is at +// most one edge from one node to another. InstBr::InstBr(Cfg *Func, Operand *Source, CfgNode *TargetTrue_, CfgNode *TargetFalse_) : InstHighLevel(Func, Inst::Br, 1, nullptr), TargetFalse(TargetFalse_), @@ -334,18 +334,18 @@ Labels = Func->allocateArrayOf<CfgNode *>(MaxSrcs); } -// TODO: A Switch instruction (and maybe others) can add duplicate -// edges. We may want to de-dup Phis and validate consistency (i.e., -// the source operands are the same for duplicate edges), though it -// seems the current lowering code is OK with this situation. +// TODO: A Switch instruction (and maybe others) can add duplicate edges. We +// may want to de-dup Phis and validate consistency (i.e., the source operands +// are the same for duplicate edges), though it seems the current lowering code +// is OK with this situation. void InstPhi::addArgument(Operand *Source, CfgNode *Label) { Labels[getSrcSize()] = Label; addSource(Source); } -// Find the source operand corresponding to the incoming edge for the -// given node. TODO: This uses a linear-time search, which could be -// improved if it becomes a problem. +// Find the source operand corresponding to the incoming edge for the given +// node. TODO: This uses a linear-time search, which could be improved if it +// becomes a problem. Operand *InstPhi::getOperandForTarget(CfgNode *Target) const { for (SizeT I = 0; I < getSrcSize(); ++I) { if (Labels[I] == Target) @@ -355,9 +355,9 @@ return nullptr; } -// Updates liveness for a particular operand based on the given -// predecessor edge. Doesn't mark the operand as live if the Phi -// instruction is dead or deleted. +// Updates liveness for a particular operand based on the given predecessor +// edge. Doesn't mark the operand as live if the Phi instruction is dead or +// deleted. void InstPhi::livenessPhiOperand(LivenessBV &Live, CfgNode *Target, Liveness *Liveness) { if (isDeleted() || Dead) @@ -377,8 +377,8 @@ llvm_unreachable("Phi operand not found for specified target node"); } -// Change "a=phi(...)" to "a_phi=phi(...)" and return a new -// instruction "a=a_phi". +// Change "a=phi(...)" to "a_phi=phi(...)" and return a new instruction +// "a=a_phi". Inst *InstPhi::lower(Cfg *Func) { Variable *Dest = getDest(); assert(Dest); @@ -562,8 +562,8 @@ return; Ostream &Str = Func->getContext()->getStrDump(); bool First = true; - // Print "LIVEEND={a,b,c}" for all source operands whose live ranges - // are known to end at this instruction. + // Print "LIVEEND={a,b,c}" for all source operands whose live ranges are + // known to end at this instruction. if (Func->isVerbose(IceV_Liveness)) { FOREACH_VAR_IN_INST(Var, *this) { if (isLastUse(Var)) { @@ -886,8 +886,7 @@ void InstFakeDef::emit(const Cfg *Func) const { if (!BuildDefs::dump()) return; - // Go ahead and "emit" these for now, since they are relatively - // rare. + // Go ahead and "emit" these for now, since they are relatively rare. Ostream &Str = Func->getContext()->getStrEmit(); Str << "\t# "; getDest()->emit(Func); @@ -948,9 +947,8 @@ if (!SrcVar) return false; if (Dest->hasReg() && Dest->getRegNum() == SrcVar->getRegNum()) { - // TODO: On x86-64, instructions like "mov eax, eax" are used to - // clear the upper 32 bits of rax. We need to recognize and - // preserve these. + // TODO: On x86-64, instructions like "mov eax, eax" are used to clear the + // upper 32 bits of rax. We need to recognize and preserve these. return true; } if (!Dest->hasReg() && !SrcVar->hasReg() &&
diff --git a/src/IceInst.def b/src/IceInst.def index a9cadb2..d265213 100644 --- a/src/IceInst.def +++ b/src/IceInst.def
@@ -7,8 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines properties of ICE instructions in the form of -// x-macros. +// This file defines properties of ICE instructions in the form of x-macros. // //===----------------------------------------------------------------------===//
diff --git a/src/IceInst.h b/src/IceInst.h index 80b2bd2..a727683 100644 --- a/src/IceInst.h +++ b/src/IceInst.h
@@ -25,9 +25,9 @@ // TODO: The Cfg structure, and instructions in particular, need to be // validated for things like valid operand types, valid branch targets, proper -// ordering of Phi and non-Phi instructions, etc. Most of the validity -// checking will be done in the bitcode reader. We need a list of everything -// that should be validated, and tests for each. +// ordering of Phi and non-Phi instructions, etc. Most of the validity checking +// will be done in the bitcode reader. We need a list of everything that should +// be validated, and tests for each. namespace Ice { @@ -118,9 +118,9 @@ return NodeList(); } virtual bool isUnconditionalBranch() const { return false; } - /// If the instruction is a branch-type instruction with OldNode as a - /// target, repoint it to NewNode and return true, otherwise return - /// false. Repoint all instances of OldNode as a target. + /// If the instruction is a branch-type instruction with OldNode as a target, + /// repoint it to NewNode and return true, otherwise return false. Repoint all + /// instances of OldNode as a target. virtual bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) { (void)OldNode; (void)NewNode; @@ -130,11 +130,11 @@ virtual bool isSimpleAssign() const { return false; } void livenessLightweight(Cfg *Func, LivenessBV &Live); - // Calculates liveness for this instruction. Returns true if this - /// instruction is (tentatively) still live and should be retained, and false - /// if this instruction is (tentatively) dead and should be deleted. The - /// decision is tentative until the liveness dataflow algorithm has converged, - /// and then a separate pass permanently deletes dead instructions. + /// Calculates liveness for this instruction. Returns true if this instruction + /// is (tentatively) still live and should be retained, and false if this + /// instruction is (tentatively) dead and should be deleted. The decision is + /// tentative until the liveness dataflow algorithm has converged, and then a + /// separate pass permanently deletes dead instructions. bool liveness(InstNumberT InstNumber, LivenessBV &Live, Liveness *Liveness, LiveBeginEndMap *LiveBegin, LiveBeginEndMap *LiveEnd); @@ -143,13 +143,12 @@ /// instructions, and a target-specific instruction results in a single native /// instruction. virtual uint32_t getEmitInstCount() const { return 0; } - // TODO(stichnot): Change Inst back to abstract once the g++ build - // issue is fixed. llvm::ilist<Ice::Inst> doesn't work under g++ - // because the resize(size_t, Ice::Inst) method is incorrectly - // declared and thus doesn't allow the abstract class Ice::Inst. - // The method should be declared resize(size_t, const Ice::Inst &). - // virtual void emit(const Cfg *Func) const = 0; - // virtual void emitIAS(const Cfg *Func) const = 0; + // TODO(stichnot): Change Inst back to abstract once the g++ build issue is + // fixed. llvm::ilist<Ice::Inst> doesn't work under g++ because the + // resize(size_t, Ice::Inst) method is incorrectly declared and thus doesn't + // allow the abstract class Ice::Inst. The method should be declared + // resize(size_t, const Ice::Inst &). virtual void emit(const Cfg *Func) + // const = 0; virtual void emitIAS(const Cfg *Func) const = 0; virtual void emit(const Cfg *) const { llvm_unreachable("emit on abstract class"); } @@ -179,8 +178,8 @@ LiveRangesEnded |= (((LREndedBits)1u) << VarIndex); } void resetLastUses() { LiveRangesEnded = 0; } - /// The destroy() method lets the instruction cleanly release any - /// memory that was allocated via the Cfg's allocator. + /// The destroy() method lets the instruction cleanly release any memory that + /// was allocated via the Cfg's allocator. virtual void destroy(Cfg *Func) { Func->deallocateArrayOf<Operand *>(Srcs); } const InstKind Kind; @@ -188,17 +187,17 @@ InstNumberT Number; /// Deleted means irrevocably deleted. bool Deleted = false; - /// Dead means one of two things depending on context: (1) pending - /// deletion after liveness analysis converges, or (2) marked for - /// deletion during lowering due to a folded bool operation. + /// Dead means one of two things depending on context: (1) pending deletion + /// after liveness analysis converges, or (2) marked for deletion during + /// lowering due to a folded bool operation. bool Dead = false; - /// HasSideEffects means the instruction is something like a function - /// call or a volatile load that can't be removed even if its Dest - /// variable is not live. + /// HasSideEffects means the instruction is something like a function call or + /// a volatile load that can't be removed even if its Dest variable is not + /// live. bool HasSideEffects = false; - /// IsDestNonKillable means that liveness analysis shouldn't consider - /// this instruction to kill the Dest variable. This is used when - /// lowering produces two assignments to the same variable. + /// IsDestNonKillable means that liveness analysis shouldn't consider this + /// instruction to kill the Dest variable. This is used when lowering produces + /// two assignments to the same variable. bool IsDestNonKillable = false; Variable *Dest; @@ -207,13 +206,12 @@ Operand **Srcs; /// LiveRangesEnded marks which Variables' live ranges end in this - /// instruction. An instruction can have an arbitrary number of - /// source operands (e.g. a call instruction), and each source - /// operand can contain 0 or 1 Variable (and target-specific operands - /// could contain more than 1 Variable). All the variables in an - /// instruction are conceptually flattened and each variable is - /// mapped to one bit position of the LiveRangesEnded bit vector. - /// Only the first CHAR_BIT * sizeof(LREndedBits) variables are + /// instruction. An instruction can have an arbitrary number of source + /// operands (e.g. a call instruction), and each source operand can contain 0 + /// or 1 Variable (and target-specific operands could contain more than 1 + /// Variable). All the variables in an instruction are conceptually flattened + /// and each variable is mapped to one bit position of the LiveRangesEnded bit + /// vector. Only the first CHAR_BIT * sizeof(LREndedBits) variables are /// tracked this way. using LREndedBits = uint32_t; // only first 32 src operands tracked, sorry LREndedBits LiveRangesEnded; @@ -235,9 +233,9 @@ } }; -/// Alloca instruction. This captures the size in bytes as getSrc(0), -/// and the required alignment in bytes. The alignment must be either -/// 0 (no alignment required) or a power of 2. +/// Alloca instruction. This captures the size in bytes as getSrc(0), and the +/// required alignment in bytes. The alignment must be either 0 (no alignment +/// required) or a power of 2. class InstAlloca : public InstHighLevel { InstAlloca() = delete; InstAlloca(const InstAlloca &) = delete; @@ -261,8 +259,8 @@ const uint32_t AlignInBytes; }; -/// Binary arithmetic instruction. The source operands are captured in -/// getSrc(0) and getSrc(1). +/// Binary arithmetic instruction. The source operands are captured in getSrc(0) +/// and getSrc(1). class InstArithmetic : public InstHighLevel { InstArithmetic() = delete; InstArithmetic(const InstArithmetic &) = delete; @@ -296,12 +294,11 @@ const OpKind Op; }; -/// Assignment instruction. The source operand is captured in -/// getSrc(0). This is not part of the LLVM bitcode, but is a useful -/// abstraction for some of the lowering. E.g., if Phi instruction -/// lowering happens before target lowering, or for representing an -/// Inttoptr instruction, or as an intermediate step for lowering a -/// Load instruction. +/// Assignment instruction. The source operand is captured in getSrc(0). This is +/// not part of the LLVM bitcode, but is a useful abstraction for some of the +/// lowering. E.g., if Phi instruction lowering happens before target lowering, +/// or for representing an Inttoptr instruction, or as an intermediate step for +/// lowering a Load instruction. class InstAssign : public InstHighLevel { InstAssign() = delete; InstAssign(const InstAssign &) = delete; @@ -319,16 +316,16 @@ InstAssign(Cfg *Func, Variable *Dest, Operand *Source); }; -/// Branch instruction. This represents both conditional and -/// unconditional branches. +/// Branch instruction. This represents both conditional and unconditional +/// branches. class InstBr : public InstHighLevel { InstBr() = delete; InstBr(const InstBr &) = delete; InstBr &operator=(const InstBr &) = delete; public: - /// Create a conditional branch. If TargetTrue==TargetFalse, it is - /// optimized to an unconditional branch. + /// Create a conditional branch. If TargetTrue==TargetFalse, it is optimized + /// to an unconditional branch. static InstBr *create(Cfg *Func, Operand *Source, CfgNode *TargetTrue, CfgNode *TargetFalse) { return new (Func->allocate<InstBr>()) @@ -365,8 +362,8 @@ CfgNode *TargetTrue; /// nullptr if unconditional branch }; -/// Call instruction. The call target is captured as getSrc(0), and -/// arg I is captured as getSrc(I+1). +/// Call instruction. The call target is captured as getSrc(0), and arg I is +/// captured as getSrc(I+1). class InstCall : public InstHighLevel { InstCall() = delete; InstCall(const InstCall &) = delete; @@ -376,8 +373,8 @@ static InstCall *create(Cfg *Func, SizeT NumArgs, Variable *Dest, Operand *CallTarget, bool HasTailCall) { /// Set HasSideEffects to true so that the call instruction can't be - /// dead-code eliminated. IntrinsicCalls can override this if the - /// particular intrinsic is deletable and has no side-effects. + /// dead-code eliminated. IntrinsicCalls can override this if the particular + /// intrinsic is deletable and has no side-effects. const bool HasSideEffects = true; const InstKind Kind = Inst::Call; return new (Func->allocate<InstCall>()) InstCall( @@ -458,8 +455,8 @@ Operand *Source2); }; -/// Floating-point comparison instruction. The source operands are -/// captured in getSrc(0) and getSrc(1). +/// Floating-point comparison instruction. The source operands are captured in +/// getSrc(0) and getSrc(1). class InstFcmp : public InstHighLevel { InstFcmp() = delete; InstFcmp(const InstFcmp &) = delete; @@ -489,8 +486,8 @@ const FCond Condition; }; -/// Integer comparison instruction. The source operands are captured -/// in getSrc(0) and getSrc(1). +/// Integer comparison instruction. The source operands are captured in +/// getSrc(0) and getSrc(1). class InstIcmp : public InstHighLevel { InstIcmp() = delete; InstIcmp(const InstIcmp &) = delete; @@ -543,8 +540,8 @@ Operand *Source2, Operand *Source3); }; -/// Call to an intrinsic function. The call target is captured as getSrc(0), -/// and arg I is captured as getSrc(I+1). +/// Call to an intrinsic function. The call target is captured as getSrc(0), and +/// arg I is captured as getSrc(I+1). class InstIntrinsicCall : public InstCall { InstIntrinsicCall() = delete; InstIntrinsicCall(const InstIntrinsicCall &) = delete; @@ -573,7 +570,7 @@ const Intrinsics::IntrinsicInfo Info; }; -/// Load instruction. The source address is captured in getSrc(0). +/// Load instruction. The source address is captured in getSrc(0). class InstLoad : public InstHighLevel { InstLoad() = delete; InstLoad(const InstLoad &) = delete; @@ -594,8 +591,8 @@ InstLoad(Cfg *Func, Variable *Dest, Operand *SourceAddr); }; -/// Phi instruction. For incoming edge I, the node is Labels[I] and -/// the Phi source operand is getSrc(I). +/// Phi instruction. For incoming edge I, the node is Labels[I] and the Phi +/// source operand is getSrc(I). class InstPhi : public InstHighLevel { InstPhi() = delete; InstPhi(const InstPhi &) = delete; @@ -621,15 +618,15 @@ Inst::destroy(Func); } - /// Labels[] duplicates the InEdges[] information in the enclosing - /// CfgNode, but the Phi instruction is created before InEdges[] - /// is available, so it's more complicated to share the list. + /// Labels[] duplicates the InEdges[] information in the enclosing CfgNode, + /// but the Phi instruction is created before InEdges[] is available, so it's + /// more complicated to share the list. CfgNode **Labels; }; -/// Ret instruction. The return value is captured in getSrc(0), but if -/// there is no return value (void-type function), then -/// getSrcSize()==0 and hasRetValue()==false. +/// Ret instruction. The return value is captured in getSrc(0), but if there is +/// no return value (void-type function), then getSrcSize()==0 and +/// hasRetValue()==false. class InstRet : public InstHighLevel { InstRet() = delete; InstRet(const InstRet &) = delete; @@ -675,8 +672,8 @@ Operand *Source2); }; -/// Store instruction. The address operand is captured, along with the -/// data operand to be stored into the address. +/// Store instruction. The address operand is captured, along with the data +/// operand to be stored into the address. class InstStore : public InstHighLevel { InstStore() = delete; InstStore(const InstStore &) = delete; @@ -700,8 +697,7 @@ InstStore(Cfg *Func, Operand *Data, Operand *Addr); }; -/// Switch instruction. The single source operand is captured as -/// getSrc(0). +/// Switch instruction. The single source operand is captured as getSrc(0). class InstSwitch : public InstHighLevel { InstSwitch() = delete; InstSwitch(const InstSwitch &) = delete; @@ -744,8 +740,7 @@ CfgNode **Labels; /// size is NumCases }; -/// Unreachable instruction. This is a terminator instruction with no -/// operands. +/// Unreachable instruction. This is a terminator instruction with no operands. class InstUnreachable : public InstHighLevel { InstUnreachable() = delete; InstUnreachable(const InstUnreachable &) = delete; @@ -765,7 +760,7 @@ explicit InstUnreachable(Cfg *Func); }; -/// BundleLock instruction. There are no operands. Contains an option +/// BundleLock instruction. There are no operands. Contains an option /// indicating whether align_to_end is specified. class InstBundleLock : public InstHighLevel { InstBundleLock() = delete; @@ -791,7 +786,7 @@ InstBundleLock(Cfg *Func, Option BundleOption); }; -/// BundleUnlock instruction. There are no operands. +/// BundleUnlock instruction. There are no operands. class InstBundleUnlock : public InstHighLevel { InstBundleUnlock() = delete; InstBundleUnlock(const InstBundleUnlock &) = delete; @@ -812,18 +807,17 @@ explicit InstBundleUnlock(Cfg *Func); }; -/// FakeDef instruction. This creates a fake definition of a variable, -/// which is how we represent the case when an instruction produces -/// multiple results. This doesn't happen with high-level ICE -/// instructions, but might with lowered instructions. For example, -/// this would be a way to represent condition flags being modified by -/// an instruction. +/// FakeDef instruction. This creates a fake definition of a variable, which is +/// how we represent the case when an instruction produces multiple results. +/// This doesn't happen with high-level ICE instructions, but might with lowered +/// instructions. For example, this would be a way to represent condition flags +/// being modified by an instruction. /// -/// It's generally useful to set the optional source operand to be the -/// dest variable of the instruction that actually produces the FakeDef -/// dest. Otherwise, the original instruction could be dead-code -/// eliminated if its dest operand is unused, and therefore the FakeDef -/// dest wouldn't be properly initialized. +/// It's generally useful to set the optional source operand to be the dest +/// variable of the instruction that actually produces the FakeDef dest. +/// Otherwise, the original instruction could be dead-code eliminated if its +/// dest operand is unused, and therefore the FakeDef dest wouldn't be properly +/// initialized. class InstFakeDef : public InstHighLevel { InstFakeDef() = delete; InstFakeDef(const InstFakeDef &) = delete; @@ -843,11 +837,10 @@ InstFakeDef(Cfg *Func, Variable *Dest, Variable *Src); }; -/// FakeUse instruction. This creates a fake use of a variable, to -/// keep the instruction that produces that variable from being -/// dead-code eliminated. This is useful in a variety of lowering -/// situations. The FakeUse instruction has no dest, so it can itself -/// never be dead-code eliminated. +/// FakeUse instruction. This creates a fake use of a variable, to keep the +/// instruction that produces that variable from being dead-code eliminated. +/// This is useful in a variety of lowering situations. The FakeUse instruction +/// has no dest, so it can itself never be dead-code eliminated. class InstFakeUse : public InstHighLevel { InstFakeUse() = delete; InstFakeUse(const InstFakeUse &) = delete; @@ -866,16 +859,15 @@ InstFakeUse(Cfg *Func, Variable *Src); }; -/// FakeKill instruction. This "kills" a set of variables by modeling -/// a trivial live range at this instruction for each (implicit) -/// variable. The primary use is to indicate that scratch registers -/// are killed after a call, so that the register allocator won't -/// assign a scratch register to a variable whose live range spans a -/// call. +/// FakeKill instruction. This "kills" a set of variables by modeling a trivial +/// live range at this instruction for each (implicit) variable. The primary use +/// is to indicate that scratch registers are killed after a call, so that the +/// register allocator won't assign a scratch register to a variable whose live +/// range spans a call. /// -/// The FakeKill instruction also holds a pointer to the instruction -/// that kills the set of variables, so that if that linked instruction -/// gets dead-code eliminated, the FakeKill instruction will as well. +/// The FakeKill instruction also holds a pointer to the instruction that kills +/// the set of variables, so that if that linked instruction gets dead-code +/// eliminated, the FakeKill instruction will as well. class InstFakeKill : public InstHighLevel { InstFakeKill() = delete; InstFakeKill(const InstFakeKill &) = delete; @@ -898,10 +890,9 @@ const Inst *Linked; }; -/// JumpTable instruction. This represents a jump table that will be -/// stored in the .rodata section. This is used to track and repoint -/// the target CfgNodes which may change, for example due to -/// splitting for phi lowering. +/// JumpTable instruction. This represents a jump table that will be stored in +/// the .rodata section. This is used to track and repoint the target CfgNodes +/// which may change, for example due to splitting for phi lowering. class InstJumpTable : public InstHighLevel { InstJumpTable() = delete; InstJumpTable(const InstJumpTable &) = delete; @@ -968,8 +959,8 @@ namespace llvm { -/// Override the default ilist traits so that Inst's private ctor and -/// deleted dtor aren't invoked. +/// Override the default ilist traits so that Inst's private ctor and deleted +/// dtor aren't invoked. template <> struct ilist_traits<Ice::Inst> : public ilist_default_traits<Ice::Inst> { Ice::Inst *createSentinel() const {
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp index 9a68115..d449641 100644 --- a/src/IceInstARM32.cpp +++ b/src/IceInstARM32.cpp
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file implements the InstARM32 and OperandARM32 classes, -/// primarily the constructors and the dump()/emit() methods. +/// This file implements the InstARM32 and OperandARM32 classes, primarily the +/// constructors and the dump()/emit() methods. /// //===----------------------------------------------------------------------===// @@ -271,16 +271,14 @@ TargetTrue(TargetTrue), TargetFalse(TargetFalse), Label(Label) {} bool InstARM32Br::optimizeBranch(const CfgNode *NextNode) { - // If there is no next block, then there can be no fallthrough to - // optimize. + // If there is no next block, then there can be no fallthrough to optimize. if (NextNode == nullptr) return false; // Intra-block conditional branches can't be optimized. if (Label) return false; - // If there is no fallthrough node, such as a non-default case label - // for a switch instruction, then there is no opportunity to - // optimize. + // If there is no fallthrough node, such as a non-default case label for a + // switch instruction, then there is no opportunity to optimize. if (getTargetFalse() == nullptr) return false; @@ -290,15 +288,15 @@ setDeleted(); return true; } - // If the fallthrough is to the next node, set fallthrough to nullptr - // to indicate. + // If the fallthrough is to the next node, set fallthrough to nullptr to + // indicate. if (getTargetFalse() == NextNode) { TargetFalse = nullptr; return true; } - // If TargetTrue is the next node, and TargetFalse is not nullptr - // (which was already tested above), then invert the branch - // condition, swap the targets, and set new fallthrough to nullptr. + // If TargetTrue is the next node, and TargetFalse is not nullptr (which was + // already tested above), then invert the branch condition, swap the targets, + // and set new fallthrough to nullptr. if (getTargetTrue() == NextNode) { assert(Predicate != CondARM32::AL); setPredicate(getOppositeCondition(getPredicate())); @@ -338,10 +336,10 @@ InstARM32Pop::InstARM32Pop(Cfg *Func, const VarList &Dests) : InstARM32(Func, InstARM32::Pop, 0, nullptr), Dests(Dests) { - // Track modifications to Dests separately via FakeDefs. - // Also, a pop instruction affects the stack pointer and so it should not - // be allowed to be automatically dead-code eliminated. This is automatic - // since we leave the Dest as nullptr. + // Track modifications to Dests separately via FakeDefs. Also, a pop + // instruction affects the stack pointer and so it should not be allowed to + // be automatically dead-code eliminated. This is automatic since we leave + // the Dest as nullptr. } InstARM32Push::InstARM32Push(Cfg *Func, const VarList &Srcs) @@ -450,8 +448,8 @@ Operand *Src0 = getSrc(0); if (const auto *Src0V = llvm::dyn_cast<Variable>(Src0)) { if (!Src0V->hasReg()) { - // Always use the whole stack slot. A 32-bit load has a larger range - // of offsets than 16-bit, etc. + // Always use the whole stack slot. A 32-bit load has a larger range of + // offsets than 16-bit, etc. ActualOpcode = IceString("ldr"); } } else { @@ -662,13 +660,13 @@ Ostream &Str = Func->getContext()->getStrEmit(); assert(getSrcSize() == 1); if (llvm::isa<ConstantInteger32>(getCallTarget())) { - // This shouldn't happen (typically have to copy the full 32-bits - // to a register and do an indirect jump). + // This shouldn't happen (typically have to copy the full 32-bits to a + // register and do an indirect jump). llvm::report_fatal_error("ARM32Call to ConstantInteger32"); } else if (const auto CallTarget = llvm::dyn_cast<ConstantRelocatable>(getCallTarget())) { - // Calls only have 24-bits, but the linker should insert veneers to - // extend the range if needed. + // Calls only have 24-bits, but the linker should insert veneers to extend + // the range if needed. Str << "\t" << "bl" << "\t";
diff --git a/src/IceInstARM32.def b/src/IceInstARM32.def index 1836667..4e34cbf 100644 --- a/src/IceInstARM32.def +++ b/src/IceInstARM32.def
@@ -17,20 +17,20 @@ // NOTE: PC and SP are not considered isInt, to avoid register allocating. // // For the NaCl sandbox we also need to r9 for TLS, so just reserve always. -// TODO(jvoung): Allow r9 to be isInt when sandboxing is turned off -// (native mode). +// TODO(jvoung): Allow r9 to be isInt when sandboxing is turned off (native +// mode). // // IP is not considered isInt to reserve it as a scratch register. A scratch // register is useful for expanding instructions post-register allocation. // -// LR is not considered isInt to avoid being allocated as a register. -// It is technically preserved, but save/restore is handled separately, -// based on whether or not the function MaybeLeafFunc. +// LR is not considered isInt to avoid being allocated as a register. It is +// technically preserved, but save/restore is handled separately, based on +// whether or not the function MaybeLeafFunc. // ALIASESn is a family of macros that we use to define register aliasing in // ARM32. n indicates how many aliases are being provided to the macro. It -// assumes the parameters are register names declared in a namespace/class named -// RegARM32. +// assumes the parameters are register names declared in a namespace/class +// named RegARM32. #define ALIASES1(r0) \ {RegARM32::r0} #define ALIASES2(r0, r1) \ @@ -152,12 +152,12 @@ //#define X(val, encode, name, scratch, preserved, stackptr, frameptr, // isInt, isFP32,isFP64, isVec128, aliases_init) -// D registers 0-7 are scratch, 8-15 are preserved, and 16-31 -// are also scratch (if supported by the D32 feature vs D16). -// D registers are defined in reverse order so that, during register allocation, -// Subzero will prefer higher D registers. In processors supporting the D32 -// feature this will effectively cause double allocation to bias towards -// allocating "high" D registers, which do not alias any S registers. +// D registers 0-7 are scratch, 8-15 are preserved, and 16-31 are also scratch +// (if supported by the D32 feature vs D16). D registers are defined in reverse +// order so that, during register allocation, Subzero will prefer higher D +// registers. In processors supporting the D32 feature this will effectively +// cause double allocation to bias towards allocating "high" D registers, which +// do not alias any S registers. // // Regenerate this with the following python script: // def print_dregs(): @@ -251,9 +251,9 @@ //#define X(val, encode, name, scratch, preserved, stackptr, frameptr, // isInt, isFP32, isFP64, isVec128, aliases_init) -// Q registers 0-3 are scratch, 4-7 are preserved, and 8-15 -// are also scratch (if supported by the D32 feature). -// Q registers are defined in reverse order for the same reason as D registers. +// Q registers 0-3 are scratch, 4-7 are preserved, and 8-15 are also scratch +// (if supported by the D32 feature). Q registers are defined in reverse order +// for the same reason as D registers. // // Regenerate this with the following python script: // def print_qregs(): @@ -320,10 +320,10 @@ // isInt, isFP32, isFP64, isVec128, alias_init) #undef ALIASES -// We also provide a combined table, so that there is a namespace where -// all of the registers are considered and have distinct numberings. -// This is in contrast to the above, where the "encode" is based on how -// the register numbers will be encoded in binaries and values can overlap. +// We also provide a combined table, so that there is a namespace where all of +// the registers are considered and have distinct numberings. This is in +// contrast to the above, where the "encode" is based on how the register +// numbers will be encoded in binaries and values can overlap. #define REGARM32_TABLE \ /* val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ isFP32, isFP64, isVec128, alias_init */ \ @@ -347,8 +347,8 @@ // define X(val, init) // Load/Store instruction width suffixes and FP/Vector element size suffixes -// the # of offset bits allowed as part of an addressing mode (for sign or -// zero extending load/stores). +// the # of offset bits allowed as part of an addressing mode (for sign or zero +// extending load/stores). #define ICETYPEARM32_TABLE \ /* tag, element type, int_width, vec_width, addr bits sext, zext */ \ X(IceType_void, IceType_void, "" , "" , 0 , 0) \ @@ -378,9 +378,9 @@ X(RRX, "rrx") //#define X(tag, emit) -// Attributes for the condition code 4-bit encoding (that is independent -// of the APSR's NZCV fields). For example, EQ is 0, but corresponds to -// Z = 1, and NE is 1, but corresponds to Z = 0. +// Attributes for the condition code 4-bit encoding (that is independent of the +// APSR's NZCV fields). For example, EQ is 0, but corresponds to Z = 1, and NE +// is 1, but corresponds to Z = 0. #define ICEINSTARM32COND_TABLE \ /* enum value, encoding, opposite, emit */ \ X(EQ, 0 , NE, "eq") /* equal */ \
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h index 8c2ea6f..89e5655 100644 --- a/src/IceInstARM32.h +++ b/src/IceInstARM32.h
@@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares the InstARM32 and OperandARM32 classes and -/// their subclasses. This represents the machine instructions and -/// operands used for ARM32 code selection. +/// This file declares the InstARM32 and OperandARM32 classes and their +/// subclasses. This represents the machine instructions and operands used for +/// ARM32 code selection. /// //===----------------------------------------------------------------------===// @@ -27,7 +27,7 @@ class TargetARM32; -/// OperandARM32 extends the Operand hierarchy. Its subclasses are +/// OperandARM32 extends the Operand hierarchy. Its subclasses are /// OperandARM32Mem and OperandARM32Flex. class OperandARM32 : public Operand { OperandARM32() = delete; @@ -87,17 +87,17 @@ /// NOTE: The Variable-typed operands have to be registers. /// /// (1) Reg + Imm. The Immediate actually has a limited number of bits - /// for encoding, so check canHoldOffset first. It cannot handle - /// general Constant operands like ConstantRelocatable, since a relocatable - /// can potentially take up too many bits. + /// for encoding, so check canHoldOffset first. It cannot handle general + /// Constant operands like ConstantRelocatable, since a relocatable can + /// potentially take up too many bits. static OperandARM32Mem *create(Cfg *Func, Type Ty, Variable *Base, ConstantInteger32 *ImmOffset, AddrMode Mode = Offset) { return new (Func->allocate<OperandARM32Mem>()) OperandARM32Mem(Func, Ty, Base, ImmOffset, Mode); } - /// (2) Reg +/- Reg with an optional shift of some kind and amount. - /// Note that this mode is disallowed in the NaCl sandbox. + /// (2) Reg +/- Reg with an optional shift of some kind and amount. Note that + /// this mode is disallowed in the NaCl sandbox. static OperandARM32Mem *create(Cfg *Func, Type Ty, Variable *Base, Variable *Index, ShiftKind ShiftOp = kNoShift, uint16_t ShiftAmt = 0, @@ -130,10 +130,10 @@ return Operand->getKind() == static_cast<OperandKind>(kMem); } - /// Return true if a load/store instruction for an element of type Ty - /// can encode the Offset directly in the immediate field of the 32-bit - /// ARM instruction. For some types, if the load is Sign extending, then - /// the range is reduced. + /// Return true if a load/store instruction for an element of type Ty can + /// encode the Offset directly in the immediate field of the 32-bit ARM + /// instruction. For some types, if the load is Sign extending, then the range + /// is reduced. static bool canHoldOffset(Type Ty, bool SignExt, int32_t Offset); private: @@ -150,10 +150,9 @@ AddrMode Mode; }; -/// OperandARM32Flex represent the "flexible second operand" for -/// data-processing instructions. It can be a rotatable 8-bit constant, or -/// a register with an optional shift operand. The shift amount can even be -/// a third register. +/// OperandARM32Flex represent the "flexible second operand" for data-processing +/// instructions. It can be a rotatable 8-bit constant, or a register with an +/// optional shift operand. The shift amount can even be a third register. class OperandARM32Flex : public OperandARM32 { OperandARM32Flex() = delete; OperandARM32Flex(const OperandARM32Flex &) = delete; @@ -191,8 +190,8 @@ return Operand->getKind() == static_cast<OperandKind>(kFlexImm); } - /// Return true if the Immediate can fit in the ARM flexible operand. - /// Fills in the out-params RotateAmt and Immed_8 if Immediate fits. + /// Return true if the Immediate can fit in the ARM flexible operand. Fills in + /// the out-params RotateAmt and Immed_8 if Immediate fits. static bool canHoldImm(uint32_t Immediate, uint32_t *RotateAmt, uint32_t *Immed_8); @@ -244,9 +243,9 @@ /// StackVariable represents a Var that isn't assigned a register (stack-only). /// It is assigned a stack slot, but the slot's offset may be too large to -/// represent in the native addressing mode, and so it has a separate -/// base register from SP/FP, where the offset from that base register is -/// then in range. +/// represent in the native addressing mode, and so it has a separate base +/// register from SP/FP, where the offset from that base register is then in +/// range. class StackVariable final : public Variable { StackVariable() = delete; StackVariable(const StackVariable &) = delete; @@ -272,8 +271,8 @@ }; /// Base class for ARM instructions. While most ARM instructions can be -/// conditionally executed, a few of them are not predicable (halt, -/// memory barriers, etc.). +/// conditionally executed, a few of them are not predicable (halt, memory +/// barriers, etc.). class InstARM32 : public InstTarget { InstARM32() = delete; InstARM32(const InstARM32 &) = delete; @@ -525,8 +524,8 @@ static const char *Opcode; }; -/// Base class for assignment instructions. -/// These can be tested for redundancy (and elided if redundant). +/// Base class for assignment instructions. These can be tested for redundancy +/// (and elided if redundant). template <InstARM32::InstKindARM32 K> class InstARM32Movlike : public InstARM32Pred { InstARM32Movlike() = delete; @@ -576,8 +575,8 @@ InstARM32ThreeAddrGPR &operator=(const InstARM32ThreeAddrGPR &) = delete; public: - /// Create an ordinary binary-op instruction like add, and sub. - /// Dest and Src1 must be registers. + /// Create an ordinary binary-op instruction like add, and sub. Dest and Src1 + /// must be registers. static InstARM32ThreeAddrGPR *create(Cfg *Func, Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Predicate, @@ -618,10 +617,10 @@ bool SetFlags; }; -/// Instructions of the form x := y op z, for vector/FP. We leave these as +/// Instructions of the form x := y op z, for vector/FP. We leave these as /// unconditional: "ARM deprecates the conditional execution of any instruction /// encoding provided by the Advanced SIMD Extension that is not also provided -/// by the Floating-point (VFP) extension". They do not set flags. +/// by the Floating-point (VFP) extension". They do not set flags. template <InstARM32::InstKindARM32 K> class InstARM32ThreeAddrFP : public InstARM32 { InstARM32ThreeAddrFP() = delete; @@ -629,8 +628,8 @@ InstARM32ThreeAddrFP &operator=(const InstARM32ThreeAddrFP &) = delete; public: - /// Create a vector/FP binary-op instruction like vadd, and vsub. - /// Everything must be a register. + /// Create a vector/FP binary-op instruction like vadd, and vsub. Everything + /// must be a register. static InstARM32ThreeAddrFP *create(Cfg *Func, Variable *Dest, Variable *Src0, Variable *Src1) { return new (Func->allocate<InstARM32ThreeAddrFP>()) @@ -779,24 +778,24 @@ using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>; using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>; using InstARM32Ldr = InstARM32Movlike<InstARM32::Ldr>; -/// Move instruction (variable <- flex). This is more of a pseudo-inst. -/// If var is a register, then we use "mov". If var is stack, then we use -/// "str" to store to the stack. +/// Move instruction (variable <- flex). This is more of a pseudo-inst. If var +/// is a register, then we use "mov". If var is stack, then we use "str" to +/// store to the stack. using InstARM32Mov = InstARM32Movlike<InstARM32::Mov>; /// Represents various vector mov instruction forms (simple single source, /// single dest forms only, not the 2 GPR <-> 1 D reg forms, etc.). using InstARM32Vldr = InstARM32Movlike<InstARM32::Vldr>; -/// MovT leaves the bottom bits alone so dest is also a source. -/// This helps indicate that a previous MovW setting dest is not dead code. +/// MovT leaves the bottom bits alone so dest is also a source. This helps +/// indicate that a previous MovW setting dest is not dead code. using InstARM32Movt = InstARM32TwoAddrGPR<InstARM32::Movt>; using InstARM32Movw = InstARM32UnaryopGPR<InstARM32::Movw, false>; using InstARM32Clz = InstARM32UnaryopGPR<InstARM32::Clz, false>; using InstARM32Mvn = InstARM32UnaryopGPR<InstARM32::Mvn, false>; using InstARM32Rbit = InstARM32UnaryopGPR<InstARM32::Rbit, false>; using InstARM32Rev = InstARM32UnaryopGPR<InstARM32::Rev, false>; -// Technically, the uxt{b,h} and sxt{b,h} instructions have a rotation -// operand as well (rotate source by 8, 16, 24 bits prior to extending), -// but we aren't using that for now, so just model as a Unaryop. +// Technically, the uxt{b,h} and sxt{b,h} instructions have a rotation operand +// as well (rotate source by 8, 16, 24 bits prior to extending), but we aren't +// using that for now, so just model as a Unaryop. using InstARM32Sxt = InstARM32UnaryopGPR<InstARM32::Sxt, true>; using InstARM32Uxt = InstARM32UnaryopGPR<InstARM32::Uxt, true>; using InstARM32Vsqrt = InstARM32UnaryopFP<InstARM32::Vsqrt>; @@ -805,9 +804,9 @@ using InstARM32Cmp = InstARM32CmpLike<InstARM32::Cmp>; using InstARM32Tst = InstARM32CmpLike<InstARM32::Tst>; -// InstARM32Label represents an intra-block label that is the target -// of an intra-block branch. The offset between the label and the -// branch must be fit in the instruction immediate (considered "near"). +// InstARM32Label represents an intra-block label that is the target of an +// intra-block branch. The offset between the label and the branch must be fit +// in the instruction immediate (considered "near"). class InstARM32Label : public InstARM32 { InstARM32Label() = delete; InstARM32Label(const InstARM32Label &) = delete; @@ -852,9 +851,9 @@ return new (Func->allocate<InstARM32Br>()) InstARM32Br(Func, NoCondTarget, Target, NoLabel, CondARM32::AL); } - /// Create a non-terminator conditional branch to a node, with a - /// fallthrough to the next instruction in the current node. This is - /// used for switch lowering. + /// Create a non-terminator conditional branch to a node, with a fallthrough + /// to the next instruction in the current node. This is used for switch + /// lowering. static InstARM32Br *create(Cfg *Func, CfgNode *Target, CondARM32::Cond Predicate) { assert(Predicate != CondARM32::AL); @@ -903,18 +902,18 @@ const InstARM32Label *Label; // Intra-block branch target }; -/// AdjustStack instruction - subtracts SP by the given amount and -/// updates the stack offset during code emission. +/// AdjustStack instruction - subtracts SP by the given amount and updates the +/// stack offset during code emission. class InstARM32AdjustStack : public InstARM32 { InstARM32AdjustStack() = delete; InstARM32AdjustStack(const InstARM32AdjustStack &) = delete; InstARM32AdjustStack &operator=(const InstARM32AdjustStack &) = delete; public: - /// Note: We need both Amount and SrcAmount. If Amount is too large then - /// it needs to be copied to a register (so SrcAmount could be a register). - /// However, we also need the numeric Amount for bookkeeping, and it's - /// hard to pull that from the generic SrcAmount operand. + /// Note: We need both Amount and SrcAmount. If Amount is too large then it + /// needs to be copied to a register (so SrcAmount could be a register). + /// However, we also need the numeric Amount for bookkeeping, and it's hard to + /// pull that from the generic SrcAmount operand. static InstARM32AdjustStack *create(Cfg *Func, Variable *SP, SizeT Amount, Operand *SrcAmount) { return new (Func->allocate<InstARM32AdjustStack>()) @@ -932,7 +931,7 @@ const SizeT Amount; }; -/// Call instruction (bl/blx). Arguments should have already been pushed. +/// Call instruction (bl/blx). Arguments should have already been pushed. /// Technically bl and the register form of blx can be predicated, but we'll /// leave that out until needed. class InstARM32Call : public InstARM32 { @@ -977,8 +976,8 @@ VarList Dests; }; -/// Push a list of GPRs. Technically this can be predicated, but we don't -/// need that functionality. +/// Push a list of GPRs. Technically this can be predicated, but we don't need +/// that functionality. class InstARM32Push : public InstARM32 { InstARM32Push() = delete; InstARM32Push(const InstARM32Push &) = delete; @@ -997,11 +996,11 @@ InstARM32Push(Cfg *Func, const VarList &Srcs); }; -/// Ret pseudo-instruction. This is actually a "bx" instruction with -/// an "lr" register operand, but epilogue lowering will search for a Ret -/// instead of a generic "bx". This instruction also takes a Source -/// operand (for non-void returning functions) for liveness analysis, though -/// a FakeUse before the ret would do just as well. +/// Ret pseudo-instruction. This is actually a "bx" instruction with an "lr" +/// register operand, but epilogue lowering will search for a Ret instead of a +/// generic "bx". This instruction also takes a Source operand (for non-void +/// returning functions) for liveness analysis, though a FakeUse before the ret +/// would do just as well. /// /// NOTE: Even though "bx" can be predicated, for now leave out the predication /// since it's not yet known to be useful for Ret. That may complicate finding @@ -1025,8 +1024,8 @@ InstARM32Ret(Cfg *Func, Variable *LR, Variable *Source); }; -/// Store instruction. It's important for liveness that there is no Dest -/// operand (OperandARM32Mem instead of Dest Variable). +/// Store instruction. It's important for liveness that there is no Dest operand +/// (OperandARM32Mem instead of Dest Variable). class InstARM32Str : public InstARM32Pred { InstARM32Str() = delete; InstARM32Str(const InstARM32Str &) = delete; @@ -1205,9 +1204,9 @@ Variable *Dest1 = nullptr; }; -// Declare partial template specializations of emit() methods that -// already have default implementations. Without this, there is the -// possibility of ODR violations and link errors. +// Declare partial template specializations of emit() methods that already have +// default implementations. Without this, there is the possibility of ODR +// violations and link errors. template <> void InstARM32Ldr::emit(const Cfg *Func) const; template <> void InstARM32Mov::emit(const Cfg *Func) const;
diff --git a/src/IceInstMIPS32.cpp b/src/IceInstMIPS32.cpp index e386806..7773272 100644 --- a/src/IceInstMIPS32.cpp +++ b/src/IceInstMIPS32.cpp
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// // /// \file -/// This file implements the InstMips32 and OperandMips32 classes, -/// primarily the constructors and the dump()/emit() methods. +/// This file implements the InstMips32 and OperandMips32 classes, primarily the +/// constructors and the dump()/emit() methods. /// //===----------------------------------------------------------------------===//
diff --git a/src/IceInstMIPS32.h b/src/IceInstMIPS32.h index e426598..1c4863f 100644 --- a/src/IceInstMIPS32.h +++ b/src/IceInstMIPS32.h
@@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares the InstMIPS32 and OperandMIPS32 classes and -/// their subclasses. This represents the machine instructions and -/// operands used for MIPS32 code selection. +/// This file declares the InstMIPS32 and OperandMIPS32 classes and their +/// subclasses. This represents the machine instructions and operands used for +/// MIPS32 code selection. /// //===----------------------------------------------------------------------===// @@ -47,13 +47,13 @@ } }; -/// Ret pseudo-instruction. This is actually a "jr" instruction with -/// an "ra" register operand, but epilogue lowering will search for a Ret -/// instead of a generic "jr". This instruction also takes a Source -/// operand (for non-void returning functions) for liveness analysis, though -/// a FakeUse before the ret would do just as well. -/// TODO(reed kotler): This needs was take from the ARM port and needs to be -/// scrubbed in the future. +/// Ret pseudo-instruction. This is actually a "jr" instruction with an "ra" +/// register operand, but epilogue lowering will search for a Ret instead of a +/// generic "jr". This instruction also takes a Source operand (for non-void +/// returning functions) for liveness analysis, though a FakeUse before the ret +/// would do just as well. +// TODO(reed kotler): This needs was take from the ARM port and needs to be +// scrubbed in the future. class InstMIPS32Ret : public InstMIPS32 { InstMIPS32Ret() = delete;
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp index cfd7fb3..3a8c57c 100644 --- a/src/IceInstX8632.cpp +++ b/src/IceInstX8632.cpp
@@ -106,9 +106,8 @@ assert(SegmentReg >= 0 && SegmentReg < SegReg_NUM); Str << "%" << X8632::Traits::InstSegmentRegNames[SegmentReg] << ":"; } - // Emit as Offset(Base,Index,1<<Shift). - // Offset is emitted without the leading '$'. - // Omit the (Base,Index,1<<Shift) part if Base==nullptr. + // Emit as Offset(Base,Index,1<<Shift). Offset is emitted without the leading + // '$'. Omit the (Base,Index,1<<Shift) part if Base==nullptr. if (!Offset) { // No offset, emit nothing. } else if (const auto CI = llvm::dyn_cast<ConstantInteger32>(Offset)) {
diff --git a/src/IceInstX8664.cpp b/src/IceInstX8664.cpp index 6d9ccd6..cb765ae 100644 --- a/src/IceInstX8664.cpp +++ b/src/IceInstX8664.cpp
@@ -92,9 +92,8 @@ if (!BuildDefs::dump()) return; Ostream &Str = Func->getContext()->getStrEmit(); - // Emit as Offset(Base,Index,1<<Shift). - // Offset is emitted without the leading '$'. - // Omit the (Base,Index,1<<Shift) part if Base==nullptr. + // Emit as Offset(Base,Index,1<<Shift). Offset is emitted without the leading + // '$'. Omit the (Base,Index,1<<Shift) part if Base==nullptr. if (!Offset) { // No offset, emit nothing. } else if (const auto CI = llvm::dyn_cast<ConstantInteger32>(Offset)) {
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h index 8883902..5ca9422 100644 --- a/src/IceInstX86Base.h +++ b/src/IceInstX86Base.h
@@ -146,9 +146,8 @@ getOppositeCondition(typename Traits::Cond::BrCond Cond); void dump(const Cfg *Func) const override; - // Shared emit routines for common forms of instructions. - // See the definition of emitTwoAddress() for a description of - // ShiftHack. + // Shared emit routines for common forms of instructions. See the definition + // of emitTwoAddress() for a description of ShiftHack. static void emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func, bool ShiftHack = false); @@ -165,16 +164,15 @@ static bool isClassof(const Inst *Inst, InstKindX86 MyKind) { return Inst->getKind() == static_cast<InstKind>(MyKind); } - // Most instructions that operate on vector arguments require vector - // memory operands to be fully aligned (16-byte alignment for PNaCl - // vector types). The stack frame layout and call ABI ensure proper - // alignment for stack operands, but memory operands (originating - // from load/store bitcode instructions) only have element-size - // alignment guarantees. This function validates that none of the - // operands is a memory operand of vector type, calling - // report_fatal_error() if one is found. This function should be - // called during emission, and maybe also in the ctor (as long as - // that fits the lowering style). + // Most instructions that operate on vector arguments require vector memory + // operands to be fully aligned (16-byte alignment for PNaCl vector types). + // The stack frame layout and call ABI ensure proper alignment for stack + // operands, but memory operands (originating from load/store bitcode + // instructions) only have element-size alignment guarantees. This function + // validates that none of the operands is a memory operand of vector type, + // calling report_fatal_error() if one is found. This function should be + // called during emission, and maybe also in the ctor (as long as that fits + // the lowering style). void validateVectorAddrMode() const { if (this->getDest()) this->validateVectorAddrModeOpnd(this->getDest()); @@ -193,8 +191,8 @@ }; /// InstX86FakeRMW represents a non-atomic read-modify-write operation on a -/// memory location. An InstX86FakeRMW is a "fake" instruction in that it -/// still needs to be lowered to some actual RMW instruction. +/// memory location. An InstX86FakeRMW is a "fake" instruction in that it still +/// needs to be lowered to some actual RMW instruction. /// /// If A is some memory address, D is some data value to apply, and OP is an /// arithmetic operator, the instruction operates as: (*A) = (*A) OP D @@ -228,17 +226,16 @@ InstArithmetic::OpKind Op, Variable *Beacon); }; -/// InstX86Label represents an intra-block label that is the target -/// of an intra-block branch. The offset between the label and the -/// branch must be fit into one byte (considered "near"). These are -/// used for lowering i1 calculations, Select instructions, and 64-bit -/// compares on a 32-bit architecture, without basic block splitting. -/// Basic block splitting is not so desirable for several reasons, one -/// of which is the impact on decisions based on whether a variable's -/// live range spans multiple basic blocks. +/// InstX86Label represents an intra-block label that is the target of an +/// intra-block branch. The offset between the label and the branch must be fit +/// into one byte (considered "near"). These are used for lowering i1 +/// calculations, Select instructions, and 64-bit compares on a 32-bit +/// architecture, without basic block splitting. Basic block splitting is not so +/// desirable for several reasons, one of which is the impact on decisions based +/// on whether a variable's live range spans multiple basic blocks. /// -/// Intra-block control flow must be used with caution. Consider the -/// sequence for "c = (a >= b ? x : y)". +/// Intra-block control flow must be used with caution. Consider the sequence +/// for "c = (a >= b ? x : y)". /// cmp a, b /// br lt, L1 /// mov c, x @@ -247,11 +244,10 @@ /// mov c, y /// L2: /// -/// Labels L1 and L2 are intra-block labels. Without knowledge of the -/// intra-block control flow, liveness analysis will determine the "mov -/// c, x" instruction to be dead. One way to prevent this is to insert -/// a "FakeUse(c)" instruction anywhere between the two "mov c, ..." -/// instructions, e.g.: +/// Labels L1 and L2 are intra-block labels. Without knowledge of the +/// intra-block control flow, liveness analysis will determine the "mov c, x" +/// instruction to be dead. One way to prevent this is to insert a "FakeUse(c)" +/// instruction anywhere between the two "mov c, ..." instructions, e.g.: /// /// cmp a, b /// br lt, L1 @@ -262,10 +258,9 @@ /// mov c, y /// L2: /// -/// The down-side is that "mov c, x" can never be dead-code eliminated -/// even if there are no uses of c. As unlikely as this situation is, -/// it may be prevented by running dead code elimination before -/// lowering. +/// The down-side is that "mov c, x" can never be dead-code eliminated even if +/// there are no uses of c. As unlikely as this situation is, it may be +/// prevented by running dead code elimination before lowering. template <class Machine> class InstX86Label final : public InstX86Base<Machine> { InstX86Label() = delete; @@ -319,9 +314,9 @@ InstX86Br(Func, NoCondTarget, Target, NoLabel, InstX86Base<Machine>::Traits::Cond::Br_None, Kind); } - /// Create a non-terminator conditional branch to a node, with a - /// fallthrough to the next instruction in the current node. This is - /// used for switch lowering. + /// Create a non-terminator conditional branch to a node, with a fallthrough + /// to the next instruction in the current node. This is used for switch + /// lowering. static InstX86Br * create(Cfg *Func, CfgNode *Target, typename InstX86Base<Machine>::Traits::Cond::BrCond Condition, @@ -381,9 +376,9 @@ const Mode Kind; }; -/// Jump to a target outside this function, such as tailcall, nacljump, -/// naclret, unreachable. This is different from a Branch instruction -/// in that there is no intra-function control flow to represent. +/// Jump to a target outside this function, such as tailcall, nacljump, naclret, +/// unreachable. This is different from a Branch instruction in that there is no +/// intra-function control flow to represent. template <class Machine> class InstX86Jmp final : public InstX86Base<Machine> { InstX86Jmp() = delete; InstX86Jmp(const InstX86Jmp &) = delete; @@ -405,8 +400,8 @@ InstX86Jmp(Cfg *Func, Operand *Target); }; -/// AdjustStack instruction - subtracts esp by the given amount and -/// updates the stack offset during code emission. +/// AdjustStack instruction - subtracts esp by the given amount and updates the +/// stack offset during code emission. template <class Machine> class InstX86AdjustStack final : public InstX86Base<Machine> { InstX86AdjustStack() = delete; @@ -431,7 +426,7 @@ SizeT Amount; }; -/// Call instruction. Arguments should have already been pushed. +/// Call instruction. Arguments should have already been pushed. template <class Machine> class InstX86Call final : public InstX86Base<Machine> { InstX86Call() = delete; InstX86Call(const InstX86Call &) = delete; @@ -514,8 +509,8 @@ Emitter; }; -/// Emit a two-operand (GPR) instruction, where the dest operand is a -/// Variable that's guaranteed to be a register. +/// Emit a two-operand (GPR) instruction, where the dest operand is a Variable +/// that's guaranteed to be a register. template <class Machine, bool VarCanBeByte = true, bool SrcCanBeByte = true> void emitIASRegOpTyGPR( const Cfg *Func, Type Ty, const Variable *Dst, const Operand *Src, @@ -540,9 +535,9 @@ Type SrcTy = this->getSrc(0)->getType(); Type DestTy = this->getDest()->getType(); Str << "\t" << Opcode << this->getWidthString(SrcTy); - // Movsx and movzx need both the source and dest type width letter - // to define the operation. The other unary operations have the - // same source and dest type and as a result need only one letter. + // Movsx and movzx need both the source and dest type width letter to + // define the operation. The other unary operations have the same source + // and dest type and as a result need only one letter. if (SrcTy != DestTy) Str << this->getWidthString(DestTy); Str << "\t"; @@ -1181,8 +1176,8 @@ Source) {} }; -/// Move packed - copy 128 bit values between XMM registers, or mem128 -/// and XMM registers. +/// Move packed - copy 128 bit values between XMM registers, or mem128 and XMM +/// registers. template <class Machine> class InstX86Movp : public InstX86BaseMovlike<Machine, InstX86Base<Machine>::Movp> { @@ -1865,13 +1860,12 @@ Func, Dest, Source) {} }; -/// movss is only a binary operation when the source and dest -/// operands are both registers (the high bits of dest are left untouched). -/// In other cases, it behaves like a copy (mov-like) operation (and the -/// high bits of dest are cleared). -/// InstX86Movss will assert that both its source and dest operands are -/// registers, so the lowering code should use _mov instead of _movss -/// in cases where a copy operation is intended. +/// movss is only a binary operation when the source and dest operands are both +/// registers (the high bits of dest are left untouched). In other cases, it +/// behaves like a copy (mov-like) operation (and the high bits of dest are +/// cleared). InstX86Movss will assert that both its source and dest operands +/// are registers, so the lowering code should use _mov instead of _movss in +/// cases where a copy operation is intended. template <class Machine> class InstX86MovssRegs : public InstX86BaseBinopXmm<Machine, InstX86Base<Machine>::MovssRegs, @@ -2072,8 +2066,8 @@ typename InstX86Base<Machine>::InstKindX86 Kind, SizeT Maxsrcs, Variable *Dest, bool Locked) : InstX86Base<Machine>(Func, Kind, Maxsrcs, Dest), Locked(Locked) { - // Assume that such instructions are used for Atomics and be careful - // with optimizations. + // Assume that such instructions are used for Atomics and be careful with + // optimizations. this->HasSideEffects = Locked; } }; @@ -2174,8 +2168,7 @@ typename InstX86Base<Machine>::Traits::Cond::BrCond Condition; }; -/// Cmpps instruction - compare packed singled-precision floating point -/// values +/// Cmpps instruction - compare packed singled-precision floating point values template <class Machine> class InstX86Cmpps final : public InstX86Base<Machine> { InstX86Cmpps() = delete; @@ -2204,10 +2197,10 @@ }; /// Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest> -/// equals eax. If so, the ZF is set and <desired> is stored in <dest>. -/// If not, ZF is cleared and <dest> is copied to eax (or subregister). -/// <dest> can be a register or memory, while <desired> must be a register. -/// It is the user's responsiblity to mark eax with a FakeDef. +/// equals eax. If so, the ZF is set and <desired> is stored in <dest>. If not, +/// ZF is cleared and <dest> is copied to eax (or subregister). <dest> can be a +/// register or memory, while <desired> must be a register. It is the user's +/// responsibility to mark eax with a FakeDef. template <class Machine> class InstX86Cmpxchg final : public InstX86BaseLockable<Machine> { InstX86Cmpxchg() = delete; @@ -2232,12 +2225,11 @@ Variable *Desired, bool Locked); }; -/// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64> -/// equals edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>. -/// If not, ZF is cleared and <m64> is copied to edx:eax. -/// The caller is responsible for inserting FakeDefs to mark edx -/// and eax as modified. -/// <m64> must be a memory operand. +/// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64> equals +/// edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>. If not, ZF is +/// cleared and <m64> is copied to edx:eax. The caller is responsible for +/// inserting FakeDefs to mark edx and eax as modified. <m64> must be a memory +/// operand. template <class Machine> class InstX86Cmpxchg8b final : public InstX86BaseLockable<Machine> { InstX86Cmpxchg8b() = delete; @@ -2267,10 +2259,10 @@ bool Locked); }; -/// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i} -/// as appropriate. s=float, d=double, i=int. X and Y are determined -/// from dest/src types. Sign and zero extension on the integer -/// operand needs to be done separately. +/// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i} as +/// appropriate. s=float, d=double, i=int. X and Y are determined from dest/src +/// types. Sign and zero extension on the integer operand needs to be done +/// separately. template <class Machine> class InstX86Cvt final : public InstX86Base<Machine> { InstX86Cvt() = delete; InstX86Cvt(const InstX86Cvt &) = delete; @@ -2406,9 +2398,8 @@ }; /// This is essentially a "mov" instruction with an -/// InstX86Base<Machine>::Traits::X86OperandMem -/// operand instead of Variable as the destination. It's important -/// for liveness that there is no Dest operand. +/// InstX86Base<Machine>::Traits::X86OperandMem operand instead of Variable as +/// the destination. It's important for liveness that there is no Dest operand. template <class Machine> class InstX86Store final : public InstX86Base<Machine> { InstX86Store() = delete; @@ -2434,10 +2425,9 @@ }; /// This is essentially a vector "mov" instruction with an typename -/// InstX86Base<Machine>::Traits::X86OperandMem -/// operand instead of Variable as the destination. It's important -/// for liveness that there is no Dest operand. The source must be an -/// Xmm register, since Dest is mem. +/// InstX86Base<Machine>::Traits::X86OperandMem operand instead of Variable as +/// the destination. It's important for liveness that there is no Dest operand. +/// The source must be an Xmm register, since Dest is mem. template <class Machine> class InstX86StoreP final : public InstX86Base<Machine> { InstX86StoreP() = delete; @@ -2596,10 +2586,10 @@ InstX86Push(Cfg *Func, Variable *Source); }; -/// Ret instruction. Currently only supports the "ret" version that -/// does not pop arguments. This instruction takes a Source operand -/// (for non-void returning functions) for liveness analysis, though -/// a FakeUse before the ret would do just as well. +/// Ret instruction. Currently only supports the "ret" version that does not pop +/// arguments. This instruction takes a Source operand (for non-void returning +/// functions) for liveness analysis, though a FakeUse before the ret would do +/// just as well. template <class Machine> class InstX86Ret final : public InstX86Base<Machine> { InstX86Ret() = delete; InstX86Ret(const InstX86Ret &) = delete; @@ -2647,10 +2637,10 @@ const typename InstX86Base<Machine>::Traits::Cond::BrCond Condition; }; -/// Exchanging Add instruction. Exchanges the first operand (destination -/// operand) with the second operand (source operand), then loads the sum -/// of the two values into the destination operand. The destination may be -/// a register or memory, while the source must be a register. +/// Exchanging Add instruction. Exchanges the first operand (destination +/// operand) with the second operand (source operand), then loads the sum of the +/// two values into the destination operand. The destination may be a register +/// or memory, while the source must be a register. /// /// Both the dest and source are updated. The caller should then insert a /// FakeDef to reflect the second udpate. @@ -2677,12 +2667,11 @@ InstX86Xadd(Cfg *Func, Operand *Dest, Variable *Source, bool Locked); }; -/// Exchange instruction. Exchanges the first operand (destination -/// operand) with the second operand (source operand). At least one of -/// the operands must be a register (and the other can be reg or mem). -/// Both the Dest and Source are updated. If there is a memory operand, -/// then the instruction is automatically "locked" without the need for -/// a lock prefix. +/// Exchange instruction. Exchanges the first operand (destination operand) with +/// the second operand (source operand). At least one of the operands must be a +/// register (and the other can be reg or mem). Both the Dest and Source are +/// updated. If there is a memory operand, then the instruction is automatically +/// "locked" without the need for a lock prefix. template <class Machine> class InstX86Xchg final : public InstX86Base<Machine> { InstX86Xchg() = delete; InstX86Xchg(const InstX86Xchg &) = delete;
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h index 336e268..677a1d3 100644 --- a/src/IceInstX86BaseImpl.h +++ b/src/IceInstX86BaseImpl.h
@@ -112,16 +112,14 @@ template <class Machine> bool InstX86Br<Machine>::optimizeBranch(const CfgNode *NextNode) { - // If there is no next block, then there can be no fallthrough to - // optimize. + // If there is no next block, then there can be no fallthrough to optimize. if (NextNode == nullptr) return false; // Intra-block conditional branches can't be optimized. if (Label) return false; - // If there is no fallthrough node, such as a non-default case label - // for a switch instruction, then there is no opportunity to - // optimize. + // If there is no fallthrough node, such as a non-default case label for a + // switch instruction, then there is no opportunity to optimize. if (getTargetFalse() == nullptr) return false; @@ -132,15 +130,15 @@ this->setDeleted(); return true; } - // If the fallthrough is to the next node, set fallthrough to nullptr - // to indicate. + // If the fallthrough is to the next node, set fallthrough to nullptr to + // indicate. if (getTargetFalse() == NextNode) { TargetFalse = nullptr; return true; } - // If TargetTrue is the next node, and TargetFalse is not nullptr - // (which was already tested above), then invert the branch - // condition, swap the targets, and set new fallthrough to nullptr. + // If TargetTrue is the next node, and TargetFalse is not nullptr (which was + // already tested above), then invert the branch condition, swap the targets, + // and set new fallthrough to nullptr. if (getTargetTrue() == NextNode) { assert(Condition != InstX86Base<Machine>::Traits::Cond::Br_None); Condition = this->getOppositeCondition(Condition); @@ -185,8 +183,8 @@ typename InstX86Base<Machine>::Traits::Cond::BrCond Condition) : InstX86Base<Machine>(Func, InstX86Base<Machine>::Cmov, 2, Dest), Condition(Condition) { - // The final result is either the original Dest, or Source, so mark - // both as sources. + // The final result is either the original Dest, or Source, so mark both as + // sources. this->addSource(Dest); this->addSource(Source); } @@ -320,12 +318,11 @@ template <class Machine> InstX86Pop<Machine>::InstX86Pop(Cfg *Func, Variable *Dest) : InstX86Base<Machine>(Func, InstX86Base<Machine>::Pop, 0, Dest) { - // A pop instruction affects the stack pointer and so it should not - // be allowed to be automatically dead-code eliminated. (The - // corresponding push instruction doesn't need this treatment - // because it has no dest variable and therefore won't be dead-code - // eliminated.) This is needed for late-stage liveness analysis - // (e.g. asm-verbose mode). + // A pop instruction affects the stack pointer and so it should not be + // allowed to be automatically dead-code eliminated. (The corresponding push + // instruction doesn't need this treatment because it has no dest variable + // and therefore won't be dead-code eliminated.) This is needed for + // late-stage liveness analysis (e.g. asm-verbose mode). this->HasSideEffects = true; } @@ -529,11 +526,10 @@ Asm->jmp(InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR( Var->getRegNum())); } else { - // The jmp instruction with a memory operand should be possible - // to encode, but it isn't a valid sandboxed instruction, and - // there shouldn't be a register allocation issue to jump - // through a scratch register, so we don't really need to bother - // implementing it. + // The jmp instruction with a memory operand should be possible to + // encode, but it isn't a valid sandboxed instruction, and there + // shouldn't be a register allocation issue to jump through a scratch + // register, so we don't really need to bother implementing it. llvm::report_fatal_error("Assembler can't jmp to memory operand"); } } else if (const auto Mem = llvm::dyn_cast< @@ -548,11 +544,10 @@ Asm->jmp(CR); } else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Target)) { // NaCl trampoline calls refer to an address within the sandbox directly. - // This is usually only needed for non-IRT builds and otherwise not - // very portable or stable. Usually this is only done for "calls" - // and not jumps. - // TODO(jvoung): Support this when there is a lowering that - // actually triggers this case. + // This is usually only needed for non-IRT builds and otherwise not very + // portable or stable. Usually this is only done for "calls" and not jumps. + // TODO(jvoung): Support this when there is a lowering that actually + // triggers this case. (void)Imm; llvm::report_fatal_error("Unexpected jmp to absolute address"); } else { @@ -633,10 +628,9 @@ getCallTarget()->dump(Func); } -// The ShiftHack parameter is used to emit "cl" instead of "ecx" for -// shift instructions, in order to be syntactically valid. The -// this->Opcode parameter needs to be char* and not IceString because of -// template issues. +// The ShiftHack parameter is used to emit "cl" instead of "ecx" for shift +// instructions, in order to be syntactically valid. The this->Opcode parameter +// needs to be char* and not IceString because of template issues. template <class Machine> void InstX86Base<Machine>::emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func, bool ShiftHack) { @@ -802,15 +796,14 @@ &Emitter) { typename InstX86Base<Machine>::Traits::Assembler *Asm = Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>(); - // Technically, the Dest Var can be mem as well, but we only use Reg. - // We can extend this to check Dest if we decide to use that form. + // Technically, the Dest Var can be mem as well, but we only use Reg. We can + // extend this to check Dest if we decide to use that form. assert(Var->hasReg()); // We cheat a little and use GPRRegister even for byte operations. typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister VarReg = InstX86Base<Machine>::Traits::RegisterSet::getEncodedByteRegOrGPR( Ty, Var->getRegNum()); - // Src must be reg == ECX or an Imm8. - // This is asserted by the assembler. + // Src must be reg == ECX or an Imm8. This is asserted by the assembler. if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) { assert(SrcVar->hasReg()); typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister SrcReg = @@ -1337,8 +1330,8 @@ &InstX86Base<Machine>::Traits::Assembler::imul}; emitIASOpTyGPR<Machine>(Func, Ty, this->getSrc(1), Emitter); } else { - // We only use imul as a two-address instruction even though - // there is a 3 operand version when one of the operands is a constant. + // We only use imul as a two-address instruction even though there is a 3 + // operand version when one of the operands is a constant. assert(Var == this->getSrc(0)); static const typename InstX86Base< Machine>::Traits::Assembler::GPREmitterRegOp Emitter = { @@ -1678,8 +1671,8 @@ Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>(); assert(this->getSrcSize() == 2); assert(Condition < InstX86Base<Machine>::Traits::Cond::Cmpps_Invalid); - // Assuming there isn't any load folding for cmpps, and vector constants - // are not allowed in PNaCl. + // Assuming there isn't any load folding for cmpps, and vector constants are + // not allowed in PNaCl. assert(llvm::isa<Variable>(this->getSrc(1))); const auto SrcVar = llvm::cast<Variable>(this->getSrc(1)); if (SrcVar->hasReg()) { @@ -1988,8 +1981,8 @@ template <class Machine> void InstX86Ucomiss<Machine>::emitIAS(const Cfg *Func) const { assert(this->getSrcSize() == 2); - // Currently src0 is always a variable by convention, to avoid having - // two memory operands. + // Currently src0 is always a variable by convention, to avoid having two + // memory operands. assert(llvm::isa<Variable>(this->getSrc(0))); const auto Src0Var = llvm::cast<Variable>(this->getSrc(0)); Type Ty = Src0Var->getType(); @@ -2291,16 +2284,16 @@ : InstX86Base<Machine>::Traits::TypeAttributes[DestTy] .SdSsString) << "\t"; } - // For an integer truncation operation, src is wider than dest. - // Ideally, we use a mov instruction whose data width matches the - // narrower dest. This is a problem if e.g. src is a register like - // esi or si where there is no 8-bit version of the register. To be - // safe, we instead widen the dest to match src. This works even - // for stack-allocated dest variables because typeWidthOnStack() - // pads to a 4-byte boundary even if only a lower portion is used. - // TODO: This assert disallows usages such as copying a floating point - // value between a vector and a scalar (which movss is used for). - // Clean this up. + // For an integer truncation operation, src is wider than dest. Ideally, we + // use a mov instruction whose data width matches the narrower dest. This is + // a problem if e.g. src is a register like esi or si where there is no 8-bit + // version of the register. To be safe, we instead widen the dest to match + // src. This works even for stack-allocated dest variables because + // typeWidthOnStack() pads to a 4-byte boundary even if only a lower portion + // is used. + // TODO: This assert disallows usages such as copying a floating + // point value between a vector and a scalar (which movss is used for). Clean + // this up. assert(Func->getTarget()->typeWidthInBytesOnStack(DestTy) == Func->getTarget()->typeWidthInBytesOnStack(SrcTy)); Src->emit(Func); @@ -2316,12 +2309,11 @@ Type DestTy = Dest->getType(); Type SrcTy = Src->getType(); // Mov can be used for GPRs or XMM registers. Also, the type does not - // necessarily match (Mov can be used for bitcasts). However, when - // the type does not match, one of the operands must be a register. - // Thus, the strategy is to find out if Src or Dest are a register, - // then use that register's type to decide on which emitter set to use. - // The emitter set will include reg-reg movs, but that case should - // be unused when the types don't match. + // necessarily match (Mov can be used for bitcasts). However, when the type + // does not match, one of the operands must be a register. Thus, the strategy + // is to find out if Src or Dest are a register, then use that register's + // type to decide on which emitter set to use. The emitter set will include + // reg-reg movs, but that case should be unused when the types don't match. static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp XmmRegEmitter = {&InstX86Base<Machine>::Traits::Assembler::movss, &InstX86Base<Machine>::Traits::Assembler::movss}; @@ -2333,16 +2325,16 @@ Machine>::Traits::Assembler::GPREmitterAddrOp GPRAddrEmitter = { &InstX86Base<Machine>::Traits::Assembler::mov, &InstX86Base<Machine>::Traits::Assembler::mov}; - // For an integer truncation operation, src is wider than dest. - // Ideally, we use a mov instruction whose data width matches the - // narrower dest. This is a problem if e.g. src is a register like - // esi or si where there is no 8-bit version of the register. To be - // safe, we instead widen the dest to match src. This works even - // for stack-allocated dest variables because typeWidthOnStack() - // pads to a 4-byte boundary even if only a lower portion is used. - // TODO: This assert disallows usages such as copying a floating point - // value between a vector and a scalar (which movss is used for). - // Clean this up. + // For an integer truncation operation, src is wider than dest. Ideally, we + // use a mov instruction whose data width matches the narrower dest. This is + // a problem if e.g. src is a register like esi or si where there is no 8-bit + // version of the register. To be safe, we instead widen the dest to match + // src. This works even for stack-allocated dest variables because + // typeWidthOnStack() pads to a 4-byte boundary even if only a lower portion + // is used. + // TODO: This assert disallows usages such as copying a floating + // point value between a vector and a scalar (which movss is used for). Clean + // this up. assert( Func->getTarget()->typeWidthInBytesOnStack(this->getDest()->getType()) == Func->getTarget()->typeWidthInBytesOnStack(Src->getType())); @@ -2375,8 +2367,8 @@ return; } } else { - // Dest must be Stack and Src *could* be a register. Use Src's type - // to decide on the emitters. + // Dest must be Stack and Src *could* be a register. Use Src's type to + // decide on the emitters. typename InstX86Base<Machine>::Traits::Address StackAddr( static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>( Func->getTarget()) @@ -2409,8 +2401,8 @@ assert(this->getSrcSize() == 1); const Variable *Dest = this->getDest(); const auto SrcVar = llvm::cast<Variable>(this->getSrc(0)); - // For insert/extract element (one of Src/Dest is an Xmm vector and - // the other is an int type). + // For insert/extract element (one of Src/Dest is an Xmm vector and the other + // is an int type). if (SrcVar->getType() == IceType_i32 || (InstX86Base<Machine>::Traits::Is64Bit && SrcVar->getType() == IceType_i64)) { @@ -2464,10 +2456,9 @@ void InstX86Movp<Machine>::emit(const Cfg *Func) const { if (!BuildDefs::dump()) return; - // TODO(wala,stichnot): movups works with all vector operands, but - // there exist other instructions (movaps, movdqa, movdqu) that may - // perform better, depending on the data type and alignment of the - // operands. + // TODO(wala,stichnot): movups works with all vector operands, but there + // exist other instructions (movaps, movdqa, movdqu) that may perform better, + // depending on the data type and alignment of the operands. Ostream &Str = Func->getContext()->getStrEmit(); assert(this->getSrcSize() == 1); Str << "\tmovups\t"; @@ -2521,8 +2512,8 @@ template <class Machine> void InstX86MovssRegs<Machine>::emitIAS(const Cfg *Func) const { - // This is Binop variant is only intended to be used for reg-reg moves - // where part of the Dest register is untouched. + // This is Binop variant is only intended to be used for reg-reg moves where + // part of the Dest register is untouched. assert(this->getSrcSize() == 2); const Variable *Dest = this->getDest(); assert(Dest == this->getSrc(0)); @@ -2542,9 +2533,9 @@ assert(this->getSrcSize() == 1); const Variable *Dest = this->getDest(); const Operand *Src = this->getSrc(0); - // Dest must be a > 8-bit register, but Src can be 8-bit. In practice - // we just use the full register for Dest to avoid having an - // OperandSizeOverride prefix. It also allows us to only dispatch on SrcTy. + // Dest must be a > 8-bit register, but Src can be 8-bit. In practice we just + // use the full register for Dest to avoid having an OperandSizeOverride + // prefix. It also allows us to only dispatch on SrcTy. Type SrcTy = Src->getType(); assert(typeWidthInBytes(Dest->getType()) > 1); assert(typeWidthInBytes(Dest->getType()) > typeWidthInBytes(SrcTy)); @@ -2596,8 +2587,8 @@ SizeT Width = typeWidthInBytes(Ty); const auto Var = llvm::dyn_cast<Variable>(this->getSrc(0)); if (Var && Var->hasReg()) { - // This is a physical xmm register, so we need to spill it to a - // temporary stack slot. + // This is a physical xmm register, so we need to spill it to a temporary + // stack slot. Str << "\tsubl\t$" << Width << ", %esp" << "\n"; Str << "\tmov" @@ -2622,8 +2613,8 @@ Type Ty = Src->getType(); if (const auto Var = llvm::dyn_cast<Variable>(Src)) { if (Var->hasReg()) { - // This is a physical xmm register, so we need to spill it to a - // temporary stack slot. + // This is a physical xmm register, so we need to spill it to a temporary + // stack slot. Immediate Width(typeWidthInBytes(Ty)); Asm->sub(IceType_i32, InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, @@ -2672,9 +2663,8 @@ Ostream &Str = Func->getContext()->getStrEmit(); assert(this->getSrcSize() == 0); // TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to - // "partially" delete the fstp if the Dest is unused. - // Even if Dest is unused, the fstp should be kept for the SideEffects - // of popping the stack. + // "partially" delete the fstp if the Dest is unused. Even if Dest is unused, + // the fstp should be kept for the SideEffects of popping the stack. if (!this->getDest()) { Str << "\tfstp\tst(0)"; return; @@ -2686,10 +2676,9 @@ this->getDest()->emit(Func); return; } - // Dest is a physical (xmm) register, so st(0) needs to go through - // memory. Hack this by creating a temporary stack slot, spilling - // st(0) there, loading it into the xmm register, and deallocating - // the stack slot. + // Dest is a physical (xmm) register, so st(0) needs to go through memory. + // Hack this by creating a temporary stack slot, spilling st(0) there, + // loading it into the xmm register, and deallocating the stack slot. Str << "\tsubl\t$" << Width << ", %esp\n"; Str << "\tfstp" << this->getFldString(Ty) << "\t" << "(%esp)\n"; @@ -2708,9 +2697,8 @@ assert(this->getSrcSize() == 0); const Variable *Dest = this->getDest(); // TODO(jvoung,stichnot): Utilize this by setting Dest to nullptr to - // "partially" delete the fstp if the Dest is unused. - // Even if Dest is unused, the fstp should be kept for the SideEffects - // of popping the stack. + // "partially" delete the fstp if the Dest is unused. Even if Dest is unused, + // the fstp should be kept for the SideEffects of popping the stack. if (!Dest) { Asm->fstp(InstX86Base<Machine>::Traits::RegisterSet::getEncodedSTReg(0)); return; @@ -2723,10 +2711,9 @@ ->stackVarToAsmOperand(Dest)); Asm->fstp(Ty, StackAddr); } else { - // Dest is a physical (xmm) register, so st(0) needs to go through - // memory. Hack this by creating a temporary stack slot, spilling - // st(0) there, loading it into the xmm register, and deallocating - // the stack slot. + // Dest is a physical (xmm) register, so st(0) needs to go through memory. + // Hack this by creating a temporary stack slot, spilling st(0) there, + // loading it into the xmm register, and deallocating the stack slot. Immediate Width(typeWidthInBytes(Ty)); Asm->sub(IceType_i32, InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, Width); @@ -2796,9 +2783,9 @@ this->getSrc(0)->emit(Func); Str << ", "; Variable *Dest = this->getDest(); - // pextrw must take a register dest. There is an SSE4.1 version that takes - // a memory dest, but we aren't using it. For uniformity, just restrict - // them all to have a register dest for now. + // pextrw must take a register dest. There is an SSE4.1 version that takes a + // memory dest, but we aren't using it. For uniformity, just restrict them + // all to have a register dest for now. assert(Dest->hasReg()); Dest->asType(IceType_i32)->emit(Func); } @@ -2813,9 +2800,9 @@ static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>( Func->getTarget()) ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1); - // pextrw must take a register dest. There is an SSE4.1 version that takes - // a memory dest, but we aren't using it. For uniformity, just restrict - // them all to have a register dest for now. + // pextrw must take a register dest. There is an SSE4.1 version that takes a + // memory dest, but we aren't using it. For uniformity, just restrict them + // all to have a register dest for now. assert(Dest->hasReg()); // pextrw's Src(0) must be a register (both SSE4.1 and SSE2). assert(llvm::cast<Variable>(this->getSrc(0))->hasReg()); @@ -2876,10 +2863,9 @@ static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>( Func->getTarget()) ->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1); - // If src1 is a register, it should always be r32 (this should fall out - // from the encodings for ByteRegs overlapping the encodings for r32), - // but we have to trust the regalloc to not choose "ah", where it - // doesn't overlap. + // If src1 is a register, it should always be r32 (this should fall out from + // the encodings for ByteRegs overlapping the encodings for r32), but we have + // to trust the regalloc to not choose "ah", where it doesn't overlap. static const typename InstX86Base<Machine>::Traits::Assembler:: template ThreeOpImmEmitter< typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
diff --git a/src/IceIntrinsics.cpp b/src/IceIntrinsics.cpp index 1dc25cc..bbbf086 100644 --- a/src/IceIntrinsics.cpp +++ b/src/IceIntrinsics.cpp
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file implements the Intrinsics utilities for matching and -/// then dispatching by name. +/// This file implements the Intrinsics utilities for matching and then +/// dispatching by name. /// //===----------------------------------------------------------------------===// @@ -33,8 +33,8 @@ #define INTRIN(ID, SE, RT) \ { Intrinsics::ID, Intrinsics::SE, Intrinsics::RT } -// Build list of intrinsics with their attributes and expected prototypes. -// List is sorted alphabetically. +// Build list of intrinsics with their attributes and expected prototypes. List +// is sorted alphabetically. const struct IceIntrinsicsEntry_ { Intrinsics::FullIntrinsicInfo Info; const char *IntrinsicName; @@ -279,8 +279,8 @@ case AtomicRMW: return true; case AtomicCmpxchg: - // Reject orderings that are disallowed by C++11 as invalid - // combinations for cmpxchg. + // Reject orderings that are disallowed by C++11 as invalid combinations + // for cmpxchg. switch (OrderOther) { case MemoryOrderRelaxed: case MemoryOrderConsume:
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h index 9270aa4..208c3c1 100644 --- a/src/IceIntrinsics.h +++ b/src/IceIntrinsics.h
@@ -62,11 +62,10 @@ Trap }; - /// Operations that can be represented by the AtomicRMW - /// intrinsic. + /// Operations that can be represented by the AtomicRMW intrinsic. /// - /// Do not reorder these values: their order offers forward - /// compatibility of bitcode targeted to PNaCl. + /// Do not reorder these values: their order offers forward compatibility of + /// bitcode targeted to PNaCl. enum AtomicRMWOperation { AtomicInvalid = 0, // Invalid, keep first. AtomicAdd, @@ -80,8 +79,8 @@ /// Memory orderings supported by PNaCl IR. /// - /// Do not reorder these values: their order offers forward - /// compatibility of bitcode targeted to PNaCl. + /// Do not reorder these values: their order offers forward compatibility of + /// bitcode targeted to PNaCl. enum MemoryOrder { MemoryOrderInvalid = 0, // Invalid, keep first. MemoryOrderRelaxed, @@ -93,11 +92,11 @@ MemoryOrderNum // Invalid, keep last. }; - /// Verify memory ordering rules for atomic intrinsics. For - /// AtomicCmpxchg, Order is the "success" ordering and OrderOther is - /// the "failure" ordering. Returns true if valid, false if invalid. - // TODO(stichnot,kschimpf): Perform memory order validation in the - // bitcode reader/parser, allowing LLVM and Subzero to share. See + /// Verify memory ordering rules for atomic intrinsics. For AtomicCmpxchg, + /// Order is the "success" ordering and OrderOther is the "failure" ordering. + /// Returns true if valid, false if invalid. + // TODO(stichnot,kschimpf): Perform memory order validation in the bitcode + // reader/parser, allowing LLVM and Subzero to share. See // https://code.google.com/p/nativeclient/issues/detail?id=4126 . static bool isMemoryOrderValid(IntrinsicID ID, uint64_t Order, uint64_t OrderOther = MemoryOrderInvalid); @@ -106,10 +105,10 @@ enum ReturnsTwice { ReturnsTwice_F = 0, ReturnsTwice_T = 1 }; - /// Basic attributes related to each intrinsic, that are relevant to - /// code generation. Perhaps the attributes representation can be shared - /// with general function calls, but PNaCl currently strips all - /// attributes from functions. + /// Basic attributes related to each intrinsic, that are relevant to code + /// generation. Perhaps the attributes representation can be shared with + /// general function calls, but PNaCl currently strips all attributes from + /// functions. struct IntrinsicInfo { enum IntrinsicID ID : 30; enum SideEffects HasSideEffects : 1; @@ -132,9 +131,9 @@ Type Signature[kMaxIntrinsicParameters]; uint8_t NumTypes; - /// Validates that type signature of call matches intrinsic. - /// If WrongArgumentType is returned, ArgIndex is set to corresponding - /// argument index. + /// Validates that type signature of call matches intrinsic. If + /// WrongArgumentType is returned, ArgIndex is set to corresponding argument + /// index. ValidateCallValue validateCall(const Ice::InstCall *Call, SizeT &ArgIndex) const; @@ -154,11 +153,11 @@ Type getArgType(SizeT Index) const; }; - /// Find the information about a given intrinsic, based on function name. If + /// Find the information about a given intrinsic, based on function name. If /// the function name does not have the common "llvm." prefix, nullptr is - /// returned and Error is set to false. Otherwise, tries to find a reference - /// to a FullIntrinsicInfo entry (valid for the lifetime of the map). If - /// found, sets Error to false and returns the reference. If not found, sets + /// returned and Error is set to false. Otherwise, tries to find a reference + /// to a FullIntrinsicInfo entry (valid for the lifetime of the map). If + /// found, sets Error to false and returns the reference. If not found, sets /// Error to true and returns nullptr (indicating an unknown "llvm.foo" /// intrinsic). const FullIntrinsicInfo *find(const IceString &Name, bool &Error) const;
diff --git a/src/IceLiveness.cpp b/src/IceLiveness.cpp index 15877b2..25cfd09 100644 --- a/src/IceLiveness.cpp +++ b/src/IceLiveness.cpp
@@ -8,15 +8,14 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file provides some of the support for the Liveness class. In -/// particular, it handles the sparsity representation of the mapping -/// between Variables and CfgNodes. The idea is that since most -/// variables are used only within a single basic block, we can -/// partition the variables into "local" and "global" sets. Instead of -/// sizing and indexing vectors according to Variable::Number, we -/// create a mapping such that global variables are mapped to low -/// indexes that are common across nodes, and local variables are -/// mapped to a higher index space that is shared across nodes. +/// This file provides some of the support for the Liveness class. In +/// particular, it handles the sparsity representation of the mapping between +/// Variables and CfgNodes. The idea is that since most variables are used only +/// within a single basic block, we can partition the variables into "local" and +/// "global" sets. Instead of sizing and indexing vectors according to +/// Variable::Number, we create a mapping such that global variables are mapped +/// to low indexes that are common across nodes, and local variables are mapped +/// to a higher index space that is shared across nodes. /// //===----------------------------------------------------------------------===// @@ -32,7 +31,7 @@ // Initializes the basic liveness-related data structures for full liveness // analysis (IsFullInit=true), or for incremental update after phi lowering -// (IsFullInit=false). In the latter case, FirstNode points to the first node +// (IsFullInit=false). In the latter case, FirstNode points to the first node // added since starting phi lowering, and FirstVar points to the first Variable // added since starting phi lowering. void Liveness::initInternal(NodeList::const_iterator FirstNode, @@ -60,7 +59,7 @@ else assert(TmpNumGlobals == 0); - // Resize each LivenessNode::LiveToVarMap, and the global LiveToVarMap. Reset + // Resize each LivenessNode::LiveToVarMap, and the global LiveToVarMap. Reset // the counts to 0. for (auto I = FirstNode, E = Func->getNodes().end(); I != E; ++I) { LivenessNode &N = Nodes[(*I)->getIndex()]; @@ -75,7 +74,7 @@ RangeMask.resize(NumVars); RangeMask.set(0, NumVars); // Track all variables by default. - // Sort each variable into the appropriate LiveToVarMap. Set VarToLiveMap. + // Sort each variable into the appropriate LiveToVarMap. Set VarToLiveMap. // Set RangeMask correctly for each variable. TmpNumGlobals = 0; for (auto I = FirstVar, E = Func->getVariables().end(); I != E; ++I) { @@ -112,8 +111,7 @@ // NumLocals, LiveToVarMap already initialized Node.LiveIn.resize(NumGlobals); Node.LiveOut.resize(NumGlobals); - // LiveBegin and LiveEnd are reinitialized before each pass over - // the block. + // LiveBegin and LiveEnd are reinitialized before each pass over the block. } }
diff --git a/src/IceLiveness.h b/src/IceLiveness.h index 895138d..bd739d3 100644 --- a/src/IceLiveness.h +++ b/src/IceLiveness.h
@@ -8,12 +8,11 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares the Liveness and LivenessNode classes, -/// which are used for liveness analysis. The node-specific -/// information tracked for each Variable includes whether it is -/// live on entry, whether it is live on exit, the instruction number -/// that starts its live range, and the instruction number that ends -/// its live range. At the Cfg level, the actual live intervals are +/// This file declares the Liveness and LivenessNode classes, which are used for +/// liveness analysis. The node-specific information tracked for each Variable +/// includes whether it is live on entry, whether it is live on exit, the +/// instruction number that starts its live range, and the instruction number +/// that ends its live range. At the Cfg level, the actual live intervals are /// recorded. /// //===----------------------------------------------------------------------===// @@ -41,22 +40,20 @@ /// NumLocals is the number of Variables local to this block. SizeT NumLocals = 0; /// NumNonDeadPhis tracks the number of Phi instructions that - /// Inst::liveness() identified as tentatively live. If - /// NumNonDeadPhis changes from the last liveness pass, then liveness - /// has not yet converged. + /// Inst::liveness() identified as tentatively live. If NumNonDeadPhis + /// changes from the last liveness pass, then liveness has not yet + /// converged. SizeT NumNonDeadPhis = 0; - // LiveToVarMap maps a liveness bitvector index to a Variable. This - // is generally just for printing/dumping. The index should be less - // than NumLocals + Liveness::NumGlobals. + // LiveToVarMap maps a liveness bitvector index to a Variable. This is + // generally just for printing/dumping. The index should be less than + // NumLocals + Liveness::NumGlobals. std::vector<Variable *> LiveToVarMap; // LiveIn and LiveOut track the in- and out-liveness of the global - // variables. The size of each vector is - // LivenessNode::NumGlobals. + // variables. The size of each vector is LivenessNode::NumGlobals. LivenessBV LiveIn, LiveOut; - // LiveBegin and LiveEnd track the instruction numbers of the start - // and end of each variable's live range within this block. The - // index/key of each element is less than NumLocals + - // Liveness::NumGlobals. + // LiveBegin and LiveEnd track the instruction numbers of the start and end + // of each variable's live range within this block. The index/key of each + // element is less than NumLocals + Liveness::NumGlobals. LiveBeginEndMap LiveBegin, LiveEnd; }; @@ -111,11 +108,11 @@ SizeT NumGlobals = 0; /// Size of Nodes is Cfg::Nodes.size(). std::vector<LivenessNode> Nodes; - /// VarToLiveMap maps a Variable's Variable::Number to its live index - /// within its basic block. + /// VarToLiveMap maps a Variable's Variable::Number to its live index within + /// its basic block. std::vector<SizeT> VarToLiveMap; - /// LiveToVarMap is analogous to LivenessNode::LiveToVarMap, but for - /// non-local variables. + /// LiveToVarMap is analogous to LivenessNode::LiveToVarMap, but for non-local + /// variables. std::vector<Variable *> LiveToVarMap; /// RangeMask[Variable::Number] indicates whether we want to track that /// Variable's live range.
diff --git a/src/IceLoopAnalyzer.cpp b/src/IceLoopAnalyzer.cpp index e2f7487..4e1b549 100644 --- a/src/IceLoopAnalyzer.cpp +++ b/src/IceLoopAnalyzer.cpp
@@ -121,9 +121,9 @@ return nullptr; } - // Reaching here means a loop has been found! It consists of the nodes on - // the top of the stack, down until the current node being processed, Node, - // is found. + // Reaching here means a loop has been found! It consists of the nodes on the + // top of the stack, down until the current node being processed, Node, is + // found. for (auto It = LoopStack.rbegin(); It != LoopStack.rend(); ++It) { (*It)->setOnStack(false); (*It)->incrementLoopNestDepth();
diff --git a/src/IceLoopAnalyzer.h b/src/IceLoopAnalyzer.h index 5991798..19d38d1 100644 --- a/src/IceLoopAnalyzer.h +++ b/src/IceLoopAnalyzer.h
@@ -35,6 +35,10 @@ /// /// This only computes the loop nest depth within the function and does not /// take into account whether the function was called from within a loop. + // TODO(ascull): this currently uses a extension of Tarjan's algorithm with + // is bounded linear. ncbray suggests another algorithm which is linear in + // practice but not bounded linear. I think it also finds dominators. + // http://lenx.100871.net/papers/loop-SAS.pdf void computeLoopNestDepth(); private: @@ -88,11 +92,11 @@ using LoopNodePtrList = std::vector<LoopNode *, CfgLocalAllocator<LoopNode *>>; - /// Process the node as part as part of Tarjan's algorithm and return either - /// a node to recurse into or nullptr when the node has been fully processed. + /// Process the node as part as part of Tarjan's algorithm and return either a + /// node to recurse into or nullptr when the node has been fully processed. LoopNode *processNode(LoopNode &Node); - /// The fuction to analyze for loops. + /// The function to analyze for loops. Cfg *const Func; /// A list of decorated nodes in the same order as Func->getNodes() which /// means the node's index will also be valid in this list.
diff --git a/src/IceOperand.cpp b/src/IceOperand.cpp index 2013dcf..125c692 100644 --- a/src/IceOperand.cpp +++ b/src/IceOperand.cpp
@@ -48,10 +48,9 @@ Range.push_back(RangeElementType(Start, End)); } -// Returns true if this live range ends before Other's live range -// starts. This means that the highest instruction number in this -// live range is less than or equal to the lowest instruction number -// of the Other live range. +// Returns true if this live range ends before Other's live range starts. This +// means that the highest instruction number in this live range is less than or +// equal to the lowest instruction number of the Other live range. bool LiveRange::endsBefore(const LiveRange &Other) const { // Neither range should be empty, but let's be graceful. if (Range.empty() || Other.Range.empty()) @@ -94,10 +93,10 @@ break; } } - // This is an equivalent but less inefficient implementation. It's - // expensive enough that we wouldn't want to run it under any build, - // but it could be enabled if e.g. the LiveRange implementation - // changes and extra testing is needed. + // This is an equivalent but less inefficient implementation. It's expensive + // enough that we wouldn't want to run it under any build, but it could be + // enabled if e.g. the LiveRange implementation changes and extra testing is + // needed. if (BuildDefs::extraValidation()) { LiveRange Temp; Temp.addSegment(OtherBegin, OtherBegin + 1); @@ -108,11 +107,10 @@ return Result; } -// Returns true if the live range contains the given instruction -// number. This is only used for validating the live range -// calculation. The IsDest argument indicates whether the Variable -// being tested is used in the Dest position (as opposed to a Src -// position). +// Returns true if the live range contains the given instruction number. This +// is only used for validating the live range calculation. The IsDest argument +// indicates whether the Variable being tested is used in the Dest position (as +// opposed to a Src position). bool LiveRange::containsValue(InstNumberT Value, bool IsDest) const { for (const RangeElementType &I : Range) { if (I.first <= Value && @@ -134,8 +132,8 @@ } Variable *Variable::asType(Type Ty) { - // Note: This returns a Variable, even if the "this" object is a - // subclass of Variable. + // Note: This returns a Variable, even if the "this" object is a subclass of + // Variable. if (!BuildDefs::dump() || getType() == Ty) return this; Variable *V = new (getCurrentCfgAllocator()->Allocate<Variable>()) @@ -171,20 +169,19 @@ if (MultiBlock == MBS_MultiBlock) return; - // TODO(stichnot): If the use occurs as a source operand in the - // first instruction of the block, and its definition is in this - // block's only predecessor, we might consider not marking this as a - // separate use. This may also apply if it's the first instruction - // of the block that actually uses a Variable. + // TODO(stichnot): If the use occurs as a source operand in the first + // instruction of the block, and its definition is in this block's only + // predecessor, we might consider not marking this as a separate use. This + // may also apply if it's the first instruction of the block that actually + // uses a Variable. assert(Node); bool MakeMulti = false; if (IsImplicit) MakeMulti = true; - // A phi source variable conservatively needs to be marked as - // multi-block, even if its definition is in the same block. This - // is because there can be additional control flow before branching - // back to this node, and the variable is live throughout those - // nodes. + // A phi source variable conservatively needs to be marked as multi-block, + // even if its definition is in the same block. This is because there can be + // additional control flow before branching back to this node, and the + // variable is live throughout those nodes. if (Instr && llvm::isa<InstPhi>(Instr)) MakeMulti = true; @@ -211,10 +208,10 @@ void VariableTracking::markDef(MetadataKind TrackingKind, const Inst *Instr, CfgNode *Node) { - // TODO(stichnot): If the definition occurs in the last instruction - // of the block, consider not marking this as a separate use. But - // be careful not to omit all uses of the variable if markDef() and - // markUse() both use this optimization. + // TODO(stichnot): If the definition occurs in the last instruction of the + // block, consider not marking this as a separate use. But be careful not to + // omit all uses of the variable if markDef() and markUse() both use this + // optimization. assert(Node); // Verify that instructions are added in increasing order. #ifndef NDEBUG @@ -517,8 +514,7 @@ // =========== Immediate Randomization and Pooling routines ============== // Specialization of the template member function for ConstantInteger32 -// TODO(stichnot): try to move this specialization into a target-specific -// file. +// TODO(stichnot): try to move this specialization into a target-specific file. template <> bool ConstantInteger32::shouldBeRandomizedOrPooled(const GlobalContext *Ctx) { uint32_t Threshold = Ctx->getFlags().getRandomizeAndPoolImmediatesThreshold();
diff --git a/src/IceOperand.h b/src/IceOperand.h index 8bec48e..b4e06be 100644 --- a/src/IceOperand.h +++ b/src/IceOperand.h
@@ -45,11 +45,11 @@ kVariable, kVariable_Target, // leave space for target-specific variable kinds kVariable_Max = kVariable_Target + MaxTargetKinds, - // Target-specific operand classes use kTarget as the starting - // point for their Kind enum space. Note that the value-spaces are shared - // across targets. To avoid confusion over the definition of shared - // values, an object specific to one target should never be passed - // to a different target. + // Target-specific operand classes use kTarget as the starting point for + // their Kind enum space. Note that the value-spaces are shared across + // targets. To avoid confusion over the definition of shared values, an + // object specific to one target should never be passed to a different + // target. kTarget, kTarget_Max = std::numeric_limits<uint8_t>::max(), }; @@ -70,8 +70,8 @@ /// \name Dumping functions. /// @{ - /// The dump(Func,Str) implementation must be sure to handle the - /// situation where Func==nullptr. + /// The dump(Func,Str) implementation must be sure to handle the situation + /// where Func==nullptr. virtual void dump(const Cfg *Func, Ostream &Str) const = 0; void dump(const Cfg *Func) const { if (!BuildDefs::dump()) @@ -105,8 +105,8 @@ return Str; } -/// Constant is the abstract base class for constants. All -/// constants are allocated from a global arena and are pooled. +/// Constant is the abstract base class for constants. All constants are +/// allocated from a global arena and are pooled. class Constant : public Operand { Constant() = delete; Constant(const Constant &) = delete; @@ -124,9 +124,9 @@ return Kind >= kConst_Base && Kind <= kConst_Max; } - /// Judge if this given immediate should be randomized or pooled - /// By default should return false, only constant integers should - /// truly go through this method. + /// Judge if this given immediate should be randomized or pooled By default + /// should return false, only constant integers should truly go through this + /// method. virtual bool shouldBeRandomizedOrPooled(const GlobalContext *Ctx) { (void)Ctx; return false; @@ -142,9 +142,9 @@ Vars = nullptr; NumVars = 0; } - /// PoolEntryID is an integer that uniquely identifies the constant - /// within its constant pool. It is used for building the constant - /// pool in the object code and for referencing its entries. + /// PoolEntryID is an integer that uniquely identifies the constant within its + /// constant pool. It is used for building the constant pool in the object + /// code and for referencing its entries. const uint32_t PoolEntryID; /// Whether we should pool this constant. Usually Float/Double and pooled /// Integers should be flagged true. @@ -219,10 +219,9 @@ Str << static_cast<int64_t>(getValue()); } -/// RelocatableTuple bundles the parameters that are used to -/// construct an ConstantRelocatable. It is done this way so that -/// ConstantRelocatable can fit into the global constant pool -/// template mechanism. +/// RelocatableTuple bundles the parameters that are used to construct an +/// ConstantRelocatable. It is done this way so that ConstantRelocatable can fit +/// into the global constant pool template mechanism. class RelocatableTuple { RelocatableTuple() = delete; RelocatableTuple &operator=(const RelocatableTuple &) = delete; @@ -240,8 +239,8 @@ bool operator==(const RelocatableTuple &A, const RelocatableTuple &B); -/// ConstantRelocatable represents a symbolic constant combined with -/// a fixed offset. +/// ConstantRelocatable represents a symbolic constant combined with a fixed +/// offset. class ConstantRelocatable : public Constant { ConstantRelocatable() = delete; ConstantRelocatable(const ConstantRelocatable &) = delete; @@ -282,9 +281,9 @@ bool SuppressMangling; }; -/// ConstantUndef represents an unspecified bit pattern. Although it is -/// legal to lower ConstantUndef to any value, backends should try to -/// make code generation deterministic by lowering ConstantUndefs to 0. +/// ConstantUndef represents an unspecified bit pattern. Although it is legal to +/// lower ConstantUndef to any value, backends should try to make code +/// generation deterministic by lowering ConstantUndefs to 0. class ConstantUndef : public Constant { ConstantUndef() = delete; ConstantUndef(const ConstantUndef &) = delete; @@ -315,9 +314,9 @@ : Constant(kConstUndef, Ty, PoolEntryID) {} }; -/// RegWeight is a wrapper for a uint32_t weight value, with a -/// special value that represents infinite weight, and an addWeight() -/// method that ensures that W+infinity=infinity. +/// RegWeight is a wrapper for a uint32_t weight value, with a special value +/// that represents infinite weight, and an addWeight() method that ensures that +/// W+infinity=infinity. class RegWeight { public: RegWeight() = default; @@ -346,15 +345,15 @@ bool operator<=(const RegWeight &A, const RegWeight &B); bool operator==(const RegWeight &A, const RegWeight &B); -/// LiveRange is a set of instruction number intervals representing -/// a variable's live range. Generally there is one interval per basic -/// block where the variable is live, but adjacent intervals get -/// coalesced into a single interval. +/// LiveRange is a set of instruction number intervals representing a variable's +/// live range. Generally there is one interval per basic block where the +/// variable is live, but adjacent intervals get coalesced into a single +/// interval. class LiveRange { public: LiveRange() = default; - /// Special constructor for building a kill set. The advantage is - /// that we can reserve the right amount of space in advance. + /// Special constructor for building a kill set. The advantage is that we can + /// reserve the right amount of space in advance. explicit LiveRange(const std::vector<InstNumberT> &Kills) { Range.reserve(Kills.size()); for (InstNumberT I : Kills) @@ -392,22 +391,21 @@ using RangeType = std::vector<RangeElementType, CfgLocalAllocator<RangeElementType>>; RangeType Range; - /// TrimmedBegin is an optimization for the overlaps() computation. - /// Since the linear-scan algorithm always calls it as overlaps(Cur) - /// and Cur advances monotonically according to live range start, we - /// can optimize overlaps() by ignoring all segments that end before - /// the start of Cur's range. The linear-scan code enables this by - /// calling trim() on the ranges of interest as Cur advances. Note - /// that linear-scan also has to initialize TrimmedBegin at the - /// beginning by calling untrim(). + /// TrimmedBegin is an optimization for the overlaps() computation. Since the + /// linear-scan algorithm always calls it as overlaps(Cur) and Cur advances + /// monotonically according to live range start, we can optimize overlaps() by + /// ignoring all segments that end before the start of Cur's range. The + /// linear-scan code enables this by calling trim() on the ranges of interest + /// as Cur advances. Note that linear-scan also has to initialize TrimmedBegin + /// at the beginning by calling untrim(). RangeType::const_iterator TrimmedBegin; }; Ostream &operator<<(Ostream &Str, const LiveRange &L); /// Variable represents an operand that is register-allocated or -/// stack-allocated. If it is register-allocated, it will ultimately -/// have a non-negative RegNum field. +/// stack-allocated. If it is register-allocated, it will ultimately have a +/// non-negative RegNum field. class Variable : public Operand { Variable() = delete; Variable(const Variable &) = delete; @@ -495,11 +493,11 @@ LoVar = Lo; HiVar = Hi; } - /// Creates a temporary copy of the variable with a different type. - /// Used primarily for syntactic correctness of textual assembly - /// emission. Note that only basic information is copied, in - /// particular not IsArgument, IsImplicitArgument, IgnoreLiveness, - /// RegNumTmp, Live, LoVar, HiVar, VarsReal. + /// Creates a temporary copy of the variable with a different type. Used + /// primarily for syntactic correctness of textual assembly emission. Note + /// that only basic information is copied, in particular not IsArgument, + /// IsImplicitArgument, IgnoreLiveness, RegNumTmp, Live, LoVar, HiVar, + /// VarsReal. Variable *asType(Type Ty); void emit(const Cfg *Func) const override; @@ -521,18 +519,18 @@ Vars[0] = this; NumVars = 1; } - /// Number is unique across all variables, and is used as a - /// (bit)vector index for liveness analysis. + /// Number is unique across all variables, and is used as a (bit)vector index + /// for liveness analysis. const SizeT Number; Cfg::IdentifierIndexType NameIndex = Cfg::IdentifierIndexInvalid; bool IsArgument = false; bool IsImplicitArgument = false; - /// IgnoreLiveness means that the variable should be ignored when - /// constructing and validating live ranges. This is usually - /// reserved for the stack pointer. + /// IgnoreLiveness means that the variable should be ignored when constructing + /// and validating live ranges. This is usually reserved for the stack + /// pointer. bool IgnoreLiveness = false; - /// StackOffset is the canonical location on stack (only if - /// RegNum==NoRegister || IsArgument). + /// StackOffset is the canonical location on stack (only if RegNum==NoRegister + /// || IsArgument). int32_t StackOffset = 0; /// RegNum is the allocated register, or NoRegister if it isn't /// register-allocated. @@ -541,17 +539,15 @@ int32_t RegNumTmp = NoRegister; RegRequirement RegRequirement = RR_MayHaveRegister; LiveRange Live; - // LoVar and HiVar are needed for lowering from 64 to 32 bits. When - // lowering from I64 to I32 on a 32-bit architecture, we split the - // variable into two machine-size pieces. LoVar is the low-order - // machine-size portion, and HiVar is the remaining high-order - // portion. TODO: It's wasteful to penalize all variables on all - // targets this way; use a sparser representation. It's also - // wasteful for a 64-bit target. + // LoVar and HiVar are needed for lowering from 64 to 32 bits. When lowering + // from I64 to I32 on a 32-bit architecture, we split the variable into two + // machine-size pieces. LoVar is the low-order machine-size portion, and + // HiVar is the remaining high-order portion. + // TODO: It's wasteful to penalize all variables on all targets this way; use + // a sparser representation. It's also wasteful for a 64-bit target. Variable *LoVar = nullptr; Variable *HiVar = nullptr; - /// VarsReal (and Operand::Vars) are set up such that Vars[0] == - /// this. + /// VarsReal (and Operand::Vars) are set up such that Vars[0] == this. Variable *VarsReal[1]; }; @@ -611,13 +607,12 @@ public: explicit VariablesMetadata(const Cfg *Func) : Func(Func) {} - /// Initialize the state by traversing all instructions/variables in - /// the CFG. + /// Initialize the state by traversing all instructions/variables in the CFG. void init(MetadataKind TrackingKind); - /// Add a single node. This is called by init(), and can be called + /// Add a single node. This is called by init(), and can be called /// incrementally from elsewhere, e.g. after edge-splitting. void addNode(CfgNode *Node); - /// Returns whether the given Variable is tracked in this object. It should + /// Returns whether the given Variable is tracked in this object. It should /// only return false if changes were made to the CFG after running init(), in /// which case the state is stale and the results shouldn't be trusted (but it /// may be OK e.g. for dumping). @@ -627,29 +622,27 @@ /// Returns whether the given Variable has multiple definitions. bool isMultiDef(const Variable *Var) const; - /// Returns the first definition instruction of the given Variable. This is + /// Returns the first definition instruction of the given Variable. This is /// only valid for variables whose definitions are all within the same block, /// e.g. T after the lowered sequence "T=B; T+=C; A=T", for which - /// getFirstDefinition(T) would return the "T=B" instruction. For variables + /// getFirstDefinition(T) would return the "T=B" instruction. For variables /// with definitions span multiple blocks, nullptr is returned. const Inst *getFirstDefinition(const Variable *Var) const; - /// Returns the definition instruction of the given Variable, when - /// the variable has exactly one definition. Otherwise, nullptr is - /// returned. + /// Returns the definition instruction of the given Variable, when the + /// variable has exactly one definition. Otherwise, nullptr is returned. const Inst *getSingleDefinition(const Variable *Var) const; /// Returns the list of all definition instructions of the given Variable. const InstDefList &getLatterDefinitions(const Variable *Var) const; - /// Returns whether the given Variable is live across multiple - /// blocks. Mainly, this is used to partition Variables into - /// single-block versus multi-block sets for leveraging sparsity in - /// liveness analysis, and for implementing simple stack slot - /// coalescing. As a special case, function arguments are always - /// considered multi-block because they are live coming into the - /// entry block. + /// Returns whether the given Variable is live across multiple blocks. Mainly, + /// this is used to partition Variables into single-block versus multi-block + /// sets for leveraging sparsity in liveness analysis, and for implementing + /// simple stack slot coalescing. As a special case, function arguments are + /// always considered multi-block because they are live coming into the entry + /// block. bool isMultiBlock(const Variable *Var) const; /// Returns the node that the given Variable is used in, assuming - /// isMultiBlock() returns false. Otherwise, nullptr is returned. + /// isMultiBlock() returns false. Otherwise, nullptr is returned. CfgNode *getLocalUseNode(const Variable *Var) const; /// Returns the total use weight computed as the sum of uses multiplied by a
diff --git a/src/IcePhiLoweringImpl.h b/src/IcePhiLoweringImpl.h index cf932d6..1957645 100644 --- a/src/IcePhiLoweringImpl.h +++ b/src/IcePhiLoweringImpl.h
@@ -24,11 +24,11 @@ namespace Ice { namespace PhiLowering { -// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to -// preserve integrity of liveness analysis. This is needed for 32-bit -// targets. This assumes the 32-bit target has loOperand, hiOperand, -// and legalizeUndef methods. Undef values are also legalized, since -// loOperand() and hiOperand() don't expect Undef input. +/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve +/// integrity of liveness analysis. This is needed for 32-bit targets. This +/// assumes the 32-bit target has loOperand, hiOperand, and legalizeUndef +/// methods. Undef values are also legalized, since loOperand() and hiOperand() +/// don't expect Undef input. template <class TargetT> void prelowerPhis32Bit(TargetT *Target, CfgNode *Node, Cfg *Func) { for (Inst &I : Node->getPhis()) {
diff --git a/src/IceRNG.cpp b/src/IceRNG.cpp index 89b1893..987d1a4 100644 --- a/src/IceRNG.cpp +++ b/src/IceRNG.cpp
@@ -25,9 +25,9 @@ // TODO(wala,stichnot): Switch to RNG implementation from LLVM or C++11. // // TODO(wala,stichnot): Make it possible to replay the RNG sequence in a -// subsequent run, for reproducing a bug. Print the seed in a comment -// in the asm output. Embed the seed in the binary via metadata that an -// attacker can't introspect. +// subsequent run, for reproducing a bug. Print the seed in a comment in the +// asm output. Embed the seed in the binary via metadata that an attacker can't +// introspect. RandomNumberGenerator::RandomNumberGenerator(uint64_t Seed, llvm::StringRef) : State(Seed) {}
diff --git a/src/IceRNG.h b/src/IceRNG.h index 4eeefa6..7ee2e39 100644 --- a/src/IceRNG.h +++ b/src/IceRNG.h
@@ -52,9 +52,9 @@ uint64_t State; }; -/// This class adds additional random number generator utilities. The -/// reason for the wrapper class is that we want to keep the -/// RandomNumberGenerator interface identical to LLVM's. +/// This class adds additional random number generator utilities. The reason for +/// the wrapper class is that we want to keep the RandomNumberGenerator +/// interface identical to LLVM's. class RandomNumberGeneratorWrapper { RandomNumberGeneratorWrapper() = delete; RandomNumberGeneratorWrapper(const RandomNumberGeneratorWrapper &) = delete; @@ -71,9 +71,9 @@ RandomNumberGenerator &RNG; }; -/// RandomShuffle is an implementation of std::random_shuffle() that -/// doesn't change across stdlib implementations. Adapted from a -/// sample implementation at cppreference.com. +/// RandomShuffle is an implementation of std::random_shuffle() that doesn't +/// change across stdlib implementations. Adapted from a sample implementation +/// at cppreference.com. template <class RandomIt, class RandomFunc> void RandomShuffle(RandomIt First, RandomIt Last, RandomFunc &&RNG) { for (auto i = Last - First - 1; i > 0; --i)
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp index ad5c2b6..304ac37 100644 --- a/src/IceRegAlloc.cpp +++ b/src/IceRegAlloc.cpp
@@ -82,14 +82,14 @@ : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()), Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)) {} -// Prepare for full register allocation of all variables. We depend on -// liveness analysis to have calculated live ranges. +// Prepare for full register allocation of all variables. We depend on liveness +// analysis to have calculated live ranges. void LinearScan::initForGlobal() { TimerMarker T(TimerStack::TT_initUnhandled, Func); FindPreference = true; // For full register allocation, normally we want to enable FindOverlap // (meaning we look for opportunities for two overlapping live ranges to - // safely share the same register). However, we disable it for phi-lowering + // safely share the same register). However, we disable it for phi-lowering // register allocation since no overlap opportunities should be available and // it's more expensive to look for opportunities. FindOverlap = (Kind != RAK_Phi); @@ -262,7 +262,7 @@ } // This is called when Cur must be allocated a register but no registers are -// available across Cur's live range. To handle this, we find a register that +// available across Cur's live range. To handle this, we find a register that // is not explicitly used during Cur's live range, spill that register to a // stack location right before Cur's live range begins, and fill (reload) the // register from the stack location right after Cur's live range ends. @@ -297,9 +297,9 @@ if (I->getNumber() == End) FillPoint = I; if (SpillPoint != E) { - // Remove from RegMask any physical registers referenced during Cur's live - // range. Start looking after SpillPoint gets set, i.e. once Cur's live - // range begins. + // Remove from RegMask any physical registers referenced during Cur's + // live range. Start looking after SpillPoint gets set, i.e. once Cur's + // live range begins. FOREACH_VAR_IN_INST(Var, *I) { if (!Var->hasRegTmp()) continue; @@ -319,8 +319,9 @@ assert(RegNum != -1); Iter.Cur->setRegNumTmp(RegNum); Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType()); - // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc - // is correctly identified as !isMultiBlock(), reducing stack frame size. + // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that + // SpillLoc is correctly identified as !isMultiBlock(), reducing stack frame + // size. Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType()); // Add "reg=FakeDef;spill=reg" before SpillPoint Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg)); @@ -413,8 +414,8 @@ if (Variable *SrcVar = llvm::dyn_cast<Variable>(DefInst->getSrc(i))) { int32_t SrcReg = SrcVar->getRegNumTmp(); // Only consider source variables that have (so far) been assigned a - // register. That register must be one in the RegMask set, e.g. - // don't try to prefer the stack pointer as a result of the stacksave + // register. That register must be one in the RegMask set, e.g. don't + // try to prefer the stack pointer as a result of the stacksave // intrinsic. if (SrcVar->hasRegTmp() && Iter.RegMask[SrcReg]) { if (FindOverlap && !Iter.Free[SrcReg]) { @@ -469,7 +470,7 @@ // Remove registers from the Free[] list where an Unhandled pre-colored range // overlaps with the current range, and set those registers to infinite weight -// so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is +// so that they aren't candidates for eviction. Cur->rangeEndsBefore(Item) is // an early exit check that turns a guaranteed O(N^2) algorithm into expected // linear complexity. void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) { @@ -610,9 +611,9 @@ const SizeT Index = I - 1; Variable *Item = Inactive[Index]; // Note: The Item->rangeOverlaps(Cur) clause is not part of the - // description of AssignMemLoc() in the original paper. But there - // doesn't seem to be any need to evict an inactive live range that - // doesn't overlap with the live range currently being considered. It's + // description of AssignMemLoc() in the original paper. But there doesn't + // seem to be any need to evict an inactive live range that doesn't + // overlap with the live range currently being considered. It's // especially bad if we would end up evicting an infinite-weight but // currently-inactive live range. The most common situation for this // would be a scratch register kill set for call instructions. @@ -644,9 +645,9 @@ if (Randomized) { // Create a random number generator for regalloc randomization. Merge // function's sequence and Kind value as the Salt. Because regAlloc() is - // called twice under O2, the second time with RAK_Phi, we check - // Kind == RAK_Phi to determine the lowest-order bit to make sure the Salt - // is different. + // called twice under O2, the second time with RAK_Phi, we check Kind == + // RAK_Phi to determine the lowest-order bit to make sure the Salt is + // different. uint64_t Salt = (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u); Target->makeRandomRegisterPermutation(
diff --git a/src/IceRegAlloc.h b/src/IceRegAlloc.h index b3986a3..ec37aa0 100644 --- a/src/IceRegAlloc.h +++ b/src/IceRegAlloc.h
@@ -60,10 +60,10 @@ void initForGlobal(); void initForInfOnly(); - /// Move an item from the From set to the To set. From[Index] is pushed onto + /// Move an item from the From set to the To set. From[Index] is pushed onto /// the end of To[], then the item is efficiently removed from From[] by /// effectively swapping it with the last item in From[] and then popping it - /// from the back. As such, the caller is best off iterating over From[] in + /// from the back. As such, the caller is best off iterating over From[] in /// reverse order to avoid the need for special handling of the iterator. void moveItem(UnorderedRanges &From, SizeT Index, UnorderedRanges &To) { To.push_back(From[Index]); @@ -109,8 +109,8 @@ /// currently assigned to. It can be greater than 1 as a result of /// AllowOverlap inference. llvm::SmallVector<int32_t, REGS_SIZE> RegUses; - // TODO(jpp): for some architectures a SmallBitVector might not be big enough. - // Evaluate what the performance impact on those architectures is. + // TODO(jpp): for some architectures a SmallBitVector might not be big + // enough. Evaluate what the performance impact on those architectures is. llvm::SmallVector<const llvm::SmallBitVector *, REGS_SIZE> RegAliases; bool FindPreference = false; bool FindOverlap = false;
diff --git a/src/IceRegistersARM32.h b/src/IceRegistersARM32.h index a80b9b2..eafed3a 100644 --- a/src/IceRegistersARM32.h +++ b/src/IceRegistersARM32.h
@@ -23,8 +23,8 @@ class RegARM32 { public: - /// An enum of every register. The enum value may not match the encoding - /// used to binary encode register operands in instructions. + /// An enum of every register. The enum value may not match the encoding used + /// to binary encode register operands in instructions. enum AllRegisters { #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ isFP32, isFP64, isVec128, alias_init) \ @@ -37,8 +37,8 @@ #undef X }; - /// An enum of GPR Registers. The enum value does match the encoding used - /// to binary encode register operands in instructions. + /// An enum of GPR Registers. The enum value does match the encoding used to + /// binary encode register operands in instructions. enum GPRRegister { #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ isFP32, isFP64, isVec128, alias_init) \
diff --git a/src/IceRegistersX8632.h b/src/IceRegistersX8632.h index b0d22bb..73492ef 100644 --- a/src/IceRegistersX8632.h +++ b/src/IceRegistersX8632.h
@@ -23,8 +23,8 @@ class RegX8632 { public: - /// An enum of every register. The enum value may not match the encoding - /// used to binary encode register operands in instructions. + /// An enum of every register. The enum value may not match the encoding used + /// to binary encode register operands in instructions. enum AllRegisters { #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ frameptr, isI8, isInt, isFP) \ @@ -37,8 +37,8 @@ #undef X }; - /// An enum of GPR Registers. The enum value does match the encoding used - /// to binary encode register operands in instructions. + /// An enum of GPR Registers. The enum value does match the encoding used to + /// binary encode register operands in instructions. enum GPRRegister { #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ frameptr, isI8, isInt, isFP) \ @@ -48,8 +48,8 @@ Encoded_Not_GPR = -1 }; - /// An enum of XMM Registers. The enum value does match the encoding used - /// to binary encode register operands in instructions. + /// An enum of XMM Registers. The enum value does match the encoding used to + /// binary encode register operands in instructions. enum XmmRegister { #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ frameptr, isI8, isInt, isFP) \ @@ -59,8 +59,8 @@ Encoded_Not_Xmm = -1 }; - /// An enum of Byte Registers. The enum value does match the encoding used - /// to binary encode register operands in instructions. + /// An enum of Byte Registers. The enum value does match the encoding used to + /// binary encode register operands in instructions. enum ByteRegister { #define X(val, encode) Encoded_##val encode, REGX8632_BYTEREG_TABLE
diff --git a/src/IceRegistersX8664.h b/src/IceRegistersX8664.h index bc448b2..3a10f00 100644 --- a/src/IceRegistersX8664.h +++ b/src/IceRegistersX8664.h
@@ -23,8 +23,8 @@ class RegX8664 { public: - /// An enum of every register. The enum value may not match the encoding - /// used to binary encode register operands in instructions. + /// An enum of every register. The enum value may not match the encoding used + /// to binary encode register operands in instructions. enum AllRegisters { #define X(val, encode, name64, name, name16, name8, scratch, preserved, \ stackptr, frameptr, isInt, isFP) \ @@ -37,8 +37,8 @@ #undef X }; - /// An enum of GPR Registers. The enum value does match the encoding used - /// to binary encode register operands in instructions. + /// An enum of GPR Registers. The enum value does match the encoding used to + /// binary encode register operands in instructions. enum GPRRegister { #define X(val, encode, name64, name, name16, name8, scratch, preserved, \ stackptr, frameptr, isInt, isFP) \ @@ -48,8 +48,8 @@ Encoded_Not_GPR = -1 }; - /// An enum of XMM Registers. The enum value does match the encoding used - /// to binary encode register operands in instructions. + /// An enum of XMM Registers. The enum value does match the encoding used to + /// binary encode register operands in instructions. enum XmmRegister { #define X(val, encode, name64, name, name16, name8, scratch, preserved, \ stackptr, frameptr, isInt, isFP) \ @@ -59,8 +59,8 @@ Encoded_Not_Xmm = -1 }; - /// An enum of Byte Registers. The enum value does match the encoding used - /// to binary encode register operands in instructions. + /// An enum of Byte Registers. The enum value does match the encoding used to + /// binary encode register operands in instructions. enum ByteRegister { #define X(val, encode) Encoded_##val encode, REGX8664_BYTEREG_TABLE
diff --git a/src/IceSwitchLowering.cpp b/src/IceSwitchLowering.cpp index 6207495..047aa91 100644 --- a/src/IceSwitchLowering.cpp +++ b/src/IceSwitchLowering.cpp
@@ -55,8 +55,8 @@ // Test for a single jump table. This can be done in constant time whereas // finding the best set of jump table would be quadratic, too slow(?). If - // jump tables were included in the search tree we'd first have to traverse to - // them. Ideally we would have an unbalanced tree which is biased towards + // jump tables were included in the search tree we'd first have to traverse + // to them. Ideally we would have an unbalanced tree which is biased towards // frequently executed code but we can't do this well without profiling data. // So, this single jump table is a good starting point where you can get to // the jump table quickly without figuring out how to unbalance the tree.
diff --git a/src/IceSwitchLowering.h b/src/IceSwitchLowering.h index e1cdb8a..df3bef3 100644 --- a/src/IceSwitchLowering.h +++ b/src/IceSwitchLowering.h
@@ -75,8 +75,8 @@ bool tryAppend(const CaseCluster &New); }; -/// Store the jump table data so that it can be emitted later in the correct -/// ELF section once the offsets from the start of the function are known. +/// Store the jump table data so that it can be emitted later in the correct ELF +/// section once the offsets from the start of the function are known. class JumpTableData { JumpTableData() = delete; JumpTableData &operator=(const JumpTableData &) = delete;
diff --git a/src/IceTLS.h b/src/IceTLS.h index 0e7731d..9a20e70 100644 --- a/src/IceTLS.h +++ b/src/IceTLS.h
@@ -9,9 +9,8 @@ /// /// \file /// This file defines macros for working around the lack of support for -/// thread_local in MacOS 10.6. It assumes std::thread is written in -/// terms of pthread. Define ICE_THREAD_LOCAL_HACK to enable the -/// pthread workarounds. +/// thread_local in MacOS 10.6. It assumes std::thread is written in terms of +/// pthread. Define ICE_THREAD_LOCAL_HACK to enable the pthread workarounds. /// //===----------------------------------------------------------------------===// @@ -26,25 +25,25 @@ // Defines 4 macros for unifying thread_local and pthread: // -// ICE_TLS_DECLARE_FIELD(Type, FieldName): Declare a static -// thread_local field inside the current class definition. "Type" -// needs to be a pointer type, such as int* or class Foo*. +// ICE_TLS_DECLARE_FIELD(Type, FieldName): Declare a static thread_local field +// inside the current class definition. "Type" needs to be a pointer type, such +// as int* or class Foo*. // // ICE_TLS_DEFINE_FIELD(Type, ClassName, FieldName): Define a static -// thread_local field outside of its class definition. The field will +// thread_local field outside of its class definition. The field will // ultimately be initialized to nullptr. // -// ICE_TLS_INIT_FIELD(FieldName): Ensure the thread_local field is -// properly initialized. This is intended to be called from within a -// static method of the field's class after main() starts (to ensure -// that the pthread library is fully initialized) but before any uses -// of ICE_TLS_GET_FIELD or ICE_TLS_SET_FIELD. +// ICE_TLS_INIT_FIELD(FieldName): Ensure the thread_local field is properly +// initialized. This is intended to be called from within a static method of +// the field's class after main() starts (to ensure that the pthread library is +// fully initialized) but before any uses of ICE_TLS_GET_FIELD or +// ICE_TLS_SET_FIELD. // // ICE_TLS_GET_FIELD(Type, FieldName): Read the value of the static -// thread_local field. Must be done within the context of its class. +// thread_local field. Must be done within the context of its class. // // ICE_TLS_SET_FIELD(FieldName, Value): Write a value into the static -// thread_local field. Must be done within the context of its class. +// thread_local field. Must be done within the context of its class. // TODO(stichnot): Limit this define to only the platforms that // absolutely require it. And ideally, eventually remove this hack @@ -52,17 +51,16 @@ #define ICE_THREAD_LOCAL_HACK #ifdef ICE_THREAD_LOCAL_HACK -// For a static thread_local field F of a class C, instead of -// declaring and defining C::F, we create two static fields: +// For a static thread_local field F of a class C, instead of declaring and +// defining C::F, we create two static fields: // static pthread_key_t F__key; // static int F__initStatus; // // The F__initStatus field is used to hold the result of the -// pthread_key_create() call, where a zero value indicates success, -// and a nonzero value indicates failure or that ICE_TLS_INIT_FIELD() -// was never called. -// The F__key field is used as the argument to -// pthread_getspecific() and pthread_setspecific(). +// pthread_key_create() call, where a zero value indicates success, and a +// nonzero value indicates failure or that ICE_TLS_INIT_FIELD() was never +// called. The F__key field is used as the argument to pthread_getspecific() +// and pthread_setspecific(). #include <pthread.h>
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp index 5268aa7..6920788 100644 --- a/src/IceTargetLowering.cpp +++ b/src/IceTargetLowering.cpp
@@ -8,11 +8,10 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file implements the skeleton of the TargetLowering class, -/// specifically invoking the appropriate lowering method for a given -/// instruction kind and driving global register allocation. It also -/// implements the non-deleted instruction iteration in -/// LoweringContext. +/// This file implements the skeleton of the TargetLowering class, specifically +/// invoking the appropriate lowering method for a given instruction kind and +/// driving global register allocation. It also implements the non-deleted +/// instruction iteration in LoweringContext. /// //===----------------------------------------------------------------------===// @@ -117,29 +116,27 @@ } } -// Lowers a single instruction according to the information in -// Context, by checking the Context.Cur instruction kind and calling -// the appropriate lowering method. The lowering method should insert -// target instructions at the Cur.Next insertion point, and should not -// delete the Context.Cur instruction or advance Context.Cur. +// Lowers a single instruction according to the information in Context, by +// checking the Context.Cur instruction kind and calling the appropriate +// lowering method. The lowering method should insert target instructions at +// the Cur.Next insertion point, and should not delete the Context.Cur +// instruction or advance Context.Cur. // -// The lowering method may look ahead in the instruction stream as -// desired, and lower additional instructions in conjunction with the -// current one, for example fusing a compare and branch. If it does, -// it should advance Context.Cur to point to the next non-deleted -// instruction to process, and it should delete any additional -// instructions it consumes. +// The lowering method may look ahead in the instruction stream as desired, and +// lower additional instructions in conjunction with the current one, for +// example fusing a compare and branch. If it does, it should advance +// Context.Cur to point to the next non-deleted instruction to process, and it +// should delete any additional instructions it consumes. void TargetLowering::lower() { assert(!Context.atEnd()); Inst *Inst = Context.getCur(); Inst->deleteIfDead(); if (!Inst->isDeleted() && !llvm::isa<InstFakeDef>(Inst) && !llvm::isa<InstFakeUse>(Inst)) { - // Mark the current instruction as deleted before lowering, - // otherwise the Dest variable will likely get marked as non-SSA. - // See Variable::setDefinition(). However, just pass-through - // FakeDef and FakeUse instructions that might have been inserted - // prior to lowering. + // Mark the current instruction as deleted before lowering, otherwise the + // Dest variable will likely get marked as non-SSA. See + // Variable::setDefinition(). However, just pass-through FakeDef and + // FakeUse instructions that might have been inserted prior to lowering. Inst->setDeleted(); switch (Inst->getKind()) { case Inst::Alloca: @@ -231,10 +228,10 @@ Func->setError("Can't lower unsupported instruction type"); } -// Drives register allocation, allowing all physical registers (except -// perhaps for the frame pointer) to be allocated. This set of -// registers could potentially be parameterized if we want to restrict -// registers e.g. for performance testing. +// Drives register allocation, allowing all physical registers (except perhaps +// for the frame pointer) to be allocated. This set of registers could +// potentially be parameterized if we want to restrict registers e.g. for +// performance testing. void TargetLowering::regAlloc(RegAllocKind Kind) { TimerMarker T(TimerStack::TT_regAlloc, Func); LinearScan LinearScan(Func); @@ -250,15 +247,14 @@ } void TargetLowering::inferTwoAddress() { - // Find two-address non-SSA instructions where Dest==Src0, and set - // the DestNonKillable flag to keep liveness analysis consistent. + // Find two-address non-SSA instructions where Dest==Src0, and set the + // DestNonKillable flag to keep liveness analysis consistent. for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) { if (Inst->isDeleted()) continue; if (Variable *Dest = Inst->getDest()) { - // TODO(stichnot): We may need to consider all source - // operands, not just the first one, if using 3-address - // instructions. + // TODO(stichnot): We may need to consider all source operands, not just + // the first one, if using 3-address instructions. if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest) Inst->setDestNonKillable(); } @@ -268,8 +264,8 @@ void TargetLowering::sortVarsByAlignment(VarList &Dest, const VarList &Source) const { Dest = Source; - // Instead of std::sort, we could do a bucket sort with log2(alignment) - // as the buckets, if performance is an issue. + // Instead of std::sort, we could do a bucket sort with log2(alignment) as + // the buckets, if performance is an issue. std::sort(Dest.begin(), Dest.end(), [this](const Variable *V1, const Variable *V2) { return typeWidthInBytesOnStack(V1->getType()) > @@ -296,17 +292,17 @@ } } - // If SimpleCoalescing is false, each variable without a register - // gets its own unique stack slot, which leads to large stack - // frames. If SimpleCoalescing is true, then each "global" variable - // without a register gets its own slot, but "local" variable slots - // are reused across basic blocks. E.g., if A and B are local to - // block 1 and C is local to block 2, then C may share a slot with A or B. + // If SimpleCoalescing is false, each variable without a register gets its + // own unique stack slot, which leads to large stack frames. If + // SimpleCoalescing is true, then each "global" variable without a register + // gets its own slot, but "local" variable slots are reused across basic + // blocks. E.g., if A and B are local to block 1 and C is local to block 2, + // then C may share a slot with A or B. // // We cannot coalesce stack slots if this function calls a "returns twice" - // function. In that case, basic blocks may be revisited, and variables - // local to those basic blocks are actually live until after the - // called function returns a second time. + // function. In that case, basic blocks may be revisited, and variables local + // to those basic blocks are actually live until after the called function + // returns a second time. const bool SimpleCoalescing = !callsReturnsTwice(); std::vector<size_t> LocalsSize(Func->getNumNodes()); @@ -317,15 +313,15 @@ RegsUsed[Var->getRegNum()] = true; continue; } - // An argument either does not need a stack slot (if passed in a - // register) or already has one (if passed on the stack). + // An argument either does not need a stack slot (if passed in a register) + // or already has one (if passed on the stack). if (Var->getIsArg()) continue; // An unreferenced variable doesn't need a stack slot. if (!IsVarReferenced[Var->getIndex()]) continue; - // Check a target-specific variable (it may end up sharing stack slots) - // and not need accounting here. + // Check a target-specific variable (it may end up sharing stack slots) and + // not need accounting here. if (TargetVarHook(Var)) continue; SpilledVariables.push_back(Var); @@ -336,8 +332,8 @@ for (Variable *Var : SortedSpilledVariables) { size_t Increment = typeWidthInBytesOnStack(Var->getType()); - // We have sorted by alignment, so the first variable we encounter that - // is located in each area determines the max alignment for the area. + // We have sorted by alignment, so the first variable we encounter that is + // located in each area determines the max alignment for the area. if (!*SpillAreaAlignmentBytes) *SpillAreaAlignmentBytes = Increment; if (SimpleCoalescing && VMetadata->isTracked(Var)) { @@ -373,8 +369,8 @@ *SpillAreaPaddingBytes = SpillAreaStart - PaddingStart; } - // If there are separate globals and locals areas, make sure the - // locals area is aligned by padding the end of the globals area. + // If there are separate globals and locals areas, make sure the locals area + // is aligned by padding the end of the globals area. if (LocalsSlotsAlignmentBytes) { uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize; GlobalsAndSubsequentPaddingSize = @@ -391,11 +387,11 @@ const VariablesMetadata *VMetadata = Func->getVMetadata(); // For testing legalization of large stack offsets on targets with limited // offset bits in instruction encodings, add some padding. This assumes that - // SpillAreaSizeBytes has accounted for the extra test padding. - // When UseFramePointer is true, the offset depends on the padding, - // not just the SpillAreaSizeBytes. On the other hand, when UseFramePointer - // is false, the offsets depend on the gap between SpillAreaSizeBytes - // and SpillAreaPaddingBytes, so we don't increment that. + // SpillAreaSizeBytes has accounted for the extra test padding. When + // UseFramePointer is true, the offset depends on the padding, not just the + // SpillAreaSizeBytes. On the other hand, when UseFramePointer is false, the + // offsets depend on the gap between SpillAreaSizeBytes and + // SpillAreaPaddingBytes, so we don't increment that. size_t TestPadding = Ctx->getFlags().getTestStackExtra(); if (UsesFramePointer) SpillAreaPaddingBytes += TestPadding; @@ -506,8 +502,8 @@ if (!BuildDefs::dump()) return; - // If external and not initialized, this must be a cross test. - // Don't generate a declaration for such cases. + // If external and not initialized, this must be a cross test. Don't generate + // a declaration for such cases. const bool IsExternal = Var.isExternal() || Ctx->getFlags().getDisableInternal(); if (IsExternal && !Var.hasInitializer()) @@ -577,10 +573,10 @@ } } } else { - // NOTE: for non-constant zero initializers, this is BSS (no bits), - // so an ELF writer would not write to the file, and only track - // virtual offsets, but the .s writer still needs this .zero and - // cannot simply use the .size to advance offsets. + // NOTE: for non-constant zero initializers, this is BSS (no bits), so an + // ELF writer would not write to the file, and only track virtual offsets, + // but the .s writer still needs this .zero and cannot simply use the .size + // to advance offsets. Str << "\t.zero\t" << Size << "\n"; }
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h index 7184ff0..71b6ca2 100644 --- a/src/IceTargetLowering.h +++ b/src/IceTargetLowering.h
@@ -9,12 +9,11 @@ /// /// \file /// This file declares the TargetLowering, LoweringContext, and -/// TargetDataLowering classes. TargetLowering is an abstract class -/// used to drive the translation/lowering process. LoweringContext -/// maintains a context for lowering each instruction, offering -/// conveniences such as iterating over non-deleted instructions. -/// TargetDataLowering is an abstract class used to drive the -/// lowering/emission of global initializers, external global +/// TargetDataLowering classes. TargetLowering is an abstract class used to +/// drive the translation/lowering process. LoweringContext maintains a context +/// for lowering each instruction, offering conveniences such as iterating over +/// non-deleted instructions. TargetDataLowering is an abstract class used to +/// drive the lowering/emission of global initializers, external global /// declarations, and internal constant pools. /// //===----------------------------------------------------------------------===// @@ -29,12 +28,11 @@ namespace Ice { -/// LoweringContext makes it easy to iterate through non-deleted -/// instructions in a node, and insert new (lowered) instructions at -/// the current point. Along with the instruction list container and -/// associated iterators, it holds the current node, which is needed -/// when inserting new instructions in order to track whether variables -/// are used as single-block or multi-block. +/// LoweringContext makes it easy to iterate through non-deleted instructions in +/// a node, and insert new (lowered) instructions at the current point. Along +/// with the instruction list container and associated iterators, it holds the +/// current node, which is needed when inserting new instructions in order to +/// track whether variables are used as single-block or multi-block. class LoweringContext { LoweringContext(const LoweringContext &) = delete; LoweringContext &operator=(const LoweringContext &) = delete; @@ -72,17 +70,16 @@ /// Node is the argument to Inst::updateVars(). CfgNode *Node = nullptr; Inst *LastInserted = nullptr; - /// Cur points to the current instruction being considered. It is - /// guaranteed to point to a non-deleted instruction, or to be End. + /// Cur points to the current instruction being considered. It is guaranteed + /// to point to a non-deleted instruction, or to be End. InstList::iterator Cur; - /// Next doubles as a pointer to the next valid instruction (if any), - /// and the new-instruction insertion point. It is also updated for - /// the caller in case the lowering consumes more than one high-level - /// instruction. It is guaranteed to point to a non-deleted - /// instruction after Cur, or to be End. TODO: Consider separating - /// the notion of "next valid instruction" and "new instruction - /// insertion point", to avoid confusion when previously-deleted - /// instructions come between the two points. + /// Next doubles as a pointer to the next valid instruction (if any), and the + /// new-instruction insertion point. It is also updated for the caller in case + /// the lowering consumes more than one high-level instruction. It is + /// guaranteed to point to a non-deleted instruction after Cur, or to be End. + // TODO: Consider separating the notion of "next valid instruction" and "new + // instruction insertion point", to avoid confusion when previously-deleted + // instructions come between the two points. InstList::iterator Next; /// Begin is a copy of Insts.begin(), used if iterators are moved backward. InstList::iterator Begin; @@ -159,24 +156,22 @@ /// Inserts and lowers a single high-level instruction at a specific insertion /// point. void lowerInst(CfgNode *Node, InstList::iterator Next, InstHighLevel *Instr); - /// Does preliminary lowering of the set of Phi instructions in the - /// current node. The main intention is to do what's needed to keep - /// the unlowered Phi instructions consistent with the lowered - /// non-Phi instructions, e.g. to lower 64-bit operands on a 32-bit - /// target. + /// Does preliminary lowering of the set of Phi instructions in the current + /// node. The main intention is to do what's needed to keep the unlowered Phi + /// instructions consistent with the lowered non-Phi instructions, e.g. to + /// lower 64-bit operands on a 32-bit target. virtual void prelowerPhis() {} - /// Tries to do branch optimization on a single instruction. Returns - /// true if some optimization was done. + /// Tries to do branch optimization on a single instruction. Returns true if + /// some optimization was done. virtual bool doBranchOpt(Inst * /*I*/, const CfgNode * /*NextNode*/) { return false; } virtual SizeT getNumRegisters() const = 0; - /// Returns a variable pre-colored to the specified physical - /// register. This is generally used to get very direct access to - /// the register such as in the prolog or epilog or for marking - /// scratch registers as killed by a call. If a Type is not - /// provided, a target-specific default type is used. + /// Returns a variable pre-colored to the specified physical register. This is + /// generally used to get very direct access to the register such as in the + /// prolog or epilog or for marking scratch registers as killed by a call. If + /// a Type is not provided, a target-specific default type is used. virtual Variable *getPhysicalRegister(SizeT RegNum, Type Ty = IceType_void) = 0; /// Returns a printable name for the register. @@ -187,8 +182,8 @@ virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0; bool hasComputedFrame() const { return HasComputedFrame; } - /// Returns true if this function calls a function that has the - /// "returns twice" attribute. + /// Returns true if this function calls a function that has the "returns + /// twice" attribute. bool callsReturnsTwice() const { return CallsReturnsTwice; } void setCallsReturnsTwice(bool RetTwice) { CallsReturnsTwice = RetTwice; } int32_t getStackAdjustment() const { return StackAdjustment; } @@ -220,10 +215,10 @@ const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const = 0; - /// Save/restore any mutable state for the situation where code - /// emission needs multiple passes, such as sandboxing or relaxation. - /// Subclasses may provide their own implementation, but should be - /// sure to also call the parent class's methods. + /// Save/restore any mutable state for the situation where code emission needs + /// multiple passes, such as sandboxing or relaxation. Subclasses may provide + /// their own implementation, but should be sure to also call the parent + /// class's methods. virtual void snapshotEmitState() { SnapshotStackAdjustment = StackAdjustment; } @@ -285,30 +280,30 @@ virtual void doMockBoundsCheck(Operand *) {} virtual void randomlyInsertNop(float Probability, RandomNumberGenerator &RNG) = 0; - /// This gives the target an opportunity to post-process the lowered - /// expansion before returning. + /// This gives the target an opportunity to post-process the lowered expansion + /// before returning. virtual void postLower() {} - /// Find two-address non-SSA instructions and set the DestNonKillable flag - /// to keep liveness analysis consistent. + /// Find two-address non-SSA instructions and set the DestNonKillable flag to + /// keep liveness analysis consistent. void inferTwoAddress(); - /// Make a pass over the Cfg to determine which variables need stack slots - /// and place them in a sorted list (SortedSpilledVariables). Among those, - /// vars, classify the spill variables as local to the basic block vs - /// global (multi-block) in order to compute the parameters GlobalsSize - /// and SpillAreaSizeBytes (represents locals or general vars if the - /// coalescing of locals is disallowed) along with alignments required - /// for variables in each area. We rely on accurate VMetadata in order to - /// classify a variable as global vs local (otherwise the variable is - /// conservatively global). The in-args should be initialized to 0. + /// Make a pass over the Cfg to determine which variables need stack slots and + /// place them in a sorted list (SortedSpilledVariables). Among those, vars, + /// classify the spill variables as local to the basic block vs global + /// (multi-block) in order to compute the parameters GlobalsSize and + /// SpillAreaSizeBytes (represents locals or general vars if the coalescing of + /// locals is disallowed) along with alignments required for variables in each + /// area. We rely on accurate VMetadata in order to classify a variable as + /// global vs local (otherwise the variable is conservatively global). The + /// in-args should be initialized to 0. /// - /// This is only a pre-pass and the actual stack slot assignment is - /// handled separately. + /// This is only a pre-pass and the actual stack slot assignment is handled + /// separately. /// - /// There may be target-specific Variable types, which will be handled - /// by TargetVarHook. If the TargetVarHook returns true, then the variable - /// is skipped and not considered with the rest of the spilled variables. + /// There may be target-specific Variable types, which will be handled by + /// TargetVarHook. If the TargetVarHook returns true, then the variable is + /// skipped and not considered with the rest of the spilled variables. void getVarStackSlotParams(VarList &SortedSpilledVariables, llvm::SmallBitVector &RegsUsed, size_t *GlobalsSize, size_t *SpillAreaSizeBytes, @@ -316,9 +311,9 @@ uint32_t *LocalsSlotsAlignmentBytes, std::function<bool(Variable *)> TargetVarHook); - /// Calculate the amount of padding needed to align the local and global - /// areas to the required alignment. This assumes the globals/locals layout - /// used by getVarStackSlotParams and assignVarStackSlots. + /// Calculate the amount of padding needed to align the local and global areas + /// to the required alignment. This assumes the globals/locals layout used by + /// getVarStackSlotParams and assignVarStackSlots. void alignStackSpillAreas(uint32_t SpillAreaStartOffset, uint32_t SpillAreaAlignmentBytes, size_t GlobalsSize, @@ -326,21 +321,19 @@ uint32_t *SpillAreaPaddingBytes, uint32_t *LocalsSlotsPaddingBytes); - /// Make a pass through the SortedSpilledVariables and actually assign - /// stack slots. SpillAreaPaddingBytes takes into account stack alignment - /// padding. The SpillArea starts after that amount of padding. - /// This matches the scheme in getVarStackSlotParams, where there may - /// be a separate multi-block global var spill area and a local var - /// spill area. + /// Make a pass through the SortedSpilledVariables and actually assign stack + /// slots. SpillAreaPaddingBytes takes into account stack alignment padding. + /// The SpillArea starts after that amount of padding. This matches the scheme + /// in getVarStackSlotParams, where there may be a separate multi-block global + /// var spill area and a local var spill area. void assignVarStackSlots(VarList &SortedSpilledVariables, size_t SpillAreaPaddingBytes, size_t SpillAreaSizeBytes, size_t GlobalsAndSubsequentPaddingSize, bool UsesFramePointer); - /// Sort the variables in Source based on required alignment. - /// The variables with the largest alignment need are placed in the front - /// of the Dest list. + /// Sort the variables in Source based on required alignment. The variables + /// with the largest alignment need are placed in the front of the Dest list. void sortVarsByAlignment(VarList &Dest, const VarList &Source) const; /// Make a call to an external helper function. @@ -362,8 +355,8 @@ GlobalContext *Ctx; bool HasComputedFrame = false; bool CallsReturnsTwice = false; - /// StackAdjustment keeps track of the current stack offset from its - /// natural location, as arguments are pushed for a function call. + /// StackAdjustment keeps track of the current stack offset from its natural + /// location, as arguments are pushed for a function call. int32_t StackAdjustment = 0; SizeT NextLabelNumber = 0; SizeT NextJumpTableNumber = 0; @@ -411,9 +404,9 @@ int32_t SnapshotStackAdjustment = 0; }; -/// TargetDataLowering is used for "lowering" data including initializers -/// for global variables, and the internal constant pools. It is separated -/// out from TargetLowering because it does not require a Cfg. +/// TargetDataLowering is used for "lowering" data including initializers for +/// global variables, and the internal constant pools. It is separated out from +/// TargetLowering because it does not require a Cfg. class TargetDataLowering { TargetDataLowering() = delete; TargetDataLowering(const TargetDataLowering &) = delete; @@ -432,8 +425,8 @@ void emitGlobal(const VariableDeclaration &Var, const IceString &SectionSuffix); - /// For now, we assume .long is the right directive for emitting 4 byte - /// emit global relocations. However, LLVM MIPS usually uses .4byte instead. + /// For now, we assume .long is the right directive for emitting 4 byte emit + /// global relocations. However, LLVM MIPS usually uses .4byte instead. /// Perhaps there is some difference when the location is unaligned. static const char *getEmit32Directive() { return ".long"; } @@ -441,9 +434,9 @@ GlobalContext *Ctx; }; -/// TargetHeaderLowering is used to "lower" the header of an output file. -/// It writes out the target-specific header attributes. E.g., for ARM -/// this writes out the build attributes (float ABI, etc.). +/// TargetHeaderLowering is used to "lower" the header of an output file. It +/// writes out the target-specific header attributes. E.g., for ARM this writes +/// out the build attributes (float ABI, etc.). class TargetHeaderLowering { TargetHeaderLowering() = delete; TargetHeaderLowering(const TargetHeaderLowering &) = delete;
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp index 0634e45..fef145f 100644 --- a/src/IceTargetLoweringARM32.cpp +++ b/src/IceTargetLoweringARM32.cpp
@@ -47,7 +47,7 @@ } while (0) // The following table summarizes the logic for lowering the icmp instruction -// for i32 and narrower types. Each icmp condition has a clear mapping to an +// for i32 and narrower types. Each icmp condition has a clear mapping to an // ARM32 conditional move instruction. const struct TableIcmp32_ { @@ -62,8 +62,8 @@ // The following table summarizes the logic for lowering the icmp instruction // for the i64 type. Two conditional moves are needed for setting to 1 or 0. -// The operands may need to be swapped, and there is a slight difference -// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc). +// The operands may need to be swapped, and there is a slight difference for +// signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc). const struct TableIcmp64_ { bool IsSigned; bool Swapped; @@ -82,18 +82,16 @@ return TableIcmp32[Index].Mapping; } -// In some cases, there are x-macros tables for both high-level and -// low-level instructions/operands that use the same enum key value. -// The tables are kept separate to maintain a proper separation -// between abstraction layers. There is a risk that the tables could -// get out of sync if enum values are reordered or if entries are -// added or deleted. The following dummy namespaces use +// In some cases, there are x-macros tables for both high-level and low-level +// instructions/operands that use the same enum key value. The tables are kept +// separate to maintain a proper separation between abstraction layers. There +// is a risk that the tables could get out of sync if enum values are reordered +// or if entries are added or deleted. The following dummy namespaces use // static_asserts to ensure everything is kept in sync. // Validate the enum values in ICMPARM32_TABLE. namespace dummy1 { -// Define a temporary set of enum values based on low-level table -// entries. +// Define a temporary set of enum values based on low-level table entries. enum _tmp_enum { #define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val, ICMPARM32_TABLE @@ -104,8 +102,8 @@ #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; ICEINSTICMP_TABLE #undef X -// Define a set of constants based on low-level table entries, and -// ensure the table entry keys are consistent. +// Define a set of constants based on low-level table entries, and ensure the +// table entry keys are consistent. #define X(val, signed, swapped64, C_32, C1_64, C2_64) \ static const int _table2_##val = _tmp_##val; \ static_assert( \ @@ -113,8 +111,8 @@ "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); ICMPARM32_TABLE #undef X -// Repeat the static asserts with respect to the high-level table -// entries in case the high-level table has extra entries. +// Repeat the static asserts with respect to the high-level table entries in +// case the high-level table has extra entries. #define X(tag, str) \ static_assert( \ _table1_##tag == _table2_##tag, \ @@ -126,17 +124,17 @@ // Stack alignment const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16; -// Value is in bytes. Return Value adjusted to the next highest multiple -// of the stack alignment. +// Value is in bytes. Return Value adjusted to the next highest multiple of the +// stack alignment. uint32_t applyStackAlignment(uint32_t Value) { return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES); } -// Value is in bytes. Return Value adjusted to the next highest multiple -// of the stack alignment required for the given type. +// Value is in bytes. Return Value adjusted to the next highest multiple of the +// stack alignment required for the given type. uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { - // Use natural alignment, except that normally (non-NaCl) ARM only - // aligns vectors to 8 bytes. + // Use natural alignment, except that normally (non-NaCl) ARM only aligns + // vectors to 8 bytes. // TODO(jvoung): Check this ... size_t typeAlignInBytes = typeWidthInBytes(Ty); if (isVectorType(Ty)) @@ -172,9 +170,8 @@ TargetARM32::TargetARM32(Cfg *Func) : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) { - // TODO: Don't initialize IntegerRegisters and friends every time. - // Instead, initialize in some sort of static initializer for the - // class. + // TODO: Don't initialize IntegerRegisters and friends every time. Instead, + // initialize in some sort of static initializer for the class. // Limit this size (or do all bitsets need to be the same width)??? llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM); @@ -243,19 +240,18 @@ // Argument lowering Func->doArgLowering(); - // Target lowering. This requires liveness analysis for some parts - // of the lowering decisions, such as compare/branch fusing. If - // non-lightweight liveness analysis is used, the instructions need - // to be renumbered first. TODO: This renumbering should only be - // necessary if we're actually calculating live intervals, which we - // only do for register allocation. + // Target lowering. This requires liveness analysis for some parts of the + // lowering decisions, such as compare/branch fusing. If non-lightweight + // liveness analysis is used, the instructions need to be renumbered first. + // TODO: This renumbering should only be necessary if we're actually + // calculating live intervals, which we only do for register allocation. Func->renumberInstructions(); if (Func->hasError()) return; - // TODO: It should be sufficient to use the fastest liveness - // calculation, i.e. livenessLightweight(). However, for some - // reason that slows down the rest of the translation. Investigate. + // TODO: It should be sufficient to use the fastest liveness calculation, + // i.e. livenessLightweight(). However, for some reason that slows down the + // rest of the translation. Investigate. Func->liveness(Liveness_Basic); if (Func->hasError()) return; @@ -266,19 +262,19 @@ return; Func->dump("After ARM32 codegen"); - // Register allocation. This requires instruction renumbering and - // full liveness analysis. + // Register allocation. This requires instruction renumbering and full + // liveness analysis. Func->renumberInstructions(); if (Func->hasError()) return; Func->liveness(Liveness_Intervals); if (Func->hasError()) return; - // Validate the live range computations. The expensive validation - // call is deliberately only made when assertions are enabled. + // Validate the live range computations. The expensive validation call is + // deliberately only made when assertions are enabled. assert(Func->validateLiveness()); - // The post-codegen dump is done here, after liveness analysis and - // associated cleanup, to make the dump cleaner and more useful. + // The post-codegen dump is done here, after liveness analysis and associated + // cleanup, to make the dump cleaner and more useful. Func->dump("After initial ARM32 codegen"); Func->getVMetadata()->init(VMK_All); regAlloc(RAK_Global); @@ -305,11 +301,10 @@ Func->contractEmptyNodes(); Func->reorderNodes(); - // Branch optimization. This needs to be done just before code - // emission. In particular, no transformations that insert or - // reorder CfgNodes should be done after branch optimization. We go - // ahead and do it before nop insertion to reduce the amount of work - // needed for searching for opportunities. + // Branch optimization. This needs to be done just before code emission. In + // particular, no transformations that insert or reorder CfgNodes should be + // done after branch optimization. We go ahead and do it before nop insertion + // to reduce the amount of work needed for searching for opportunities. Func->doBranchOpt(); Func->dump("After branch optimization"); @@ -395,8 +390,8 @@ Reg = Func->makeVariable(Ty); Reg->setRegNum(RegNum); PhysicalRegisters[Ty][RegNum] = Reg; - // Specially mark SP and LR as an "argument" so that it is considered - // live upon function entry. + // Specially mark SP and LR as an "argument" so that it is considered live + // upon function entry. if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) { Func->addImplicitArg(Reg); Reg->setIgnoreLiveness(); @@ -445,15 +440,15 @@ if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) return false; int32_t RegLo, RegHi; - // Always start i64 registers at an even register, so this may end - // up padding away a register. + // Always start i64 registers at an even register, so this may end up padding + // away a register. NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2); RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed; ++NumGPRRegsUsed; RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed; ++NumGPRRegsUsed; - // If this bumps us past the boundary, don't allocate to a register - // and leave any previously speculatively consumed registers as consumed. + // If this bumps us past the boundary, don't allocate to a register and leave + // any previously speculatively consumed registers as consumed. if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG) return false; Regs->first = RegLo; @@ -474,15 +469,15 @@ return false; if (isVectorType(Ty)) { NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4); - // Q registers are declared in reverse order, so - // RegARM32::Reg_q0 > RegARM32::Reg_q1. Therefore, we need to subtract - // NumFPRegUnits from Reg_q0. Same thing goes for D registers. + // Q registers are declared in reverse order, so RegARM32::Reg_q0 > + // RegARM32::Reg_q1. Therefore, we need to subtract NumFPRegUnits from + // Reg_q0. Same thing goes for D registers. static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1, "ARM32 Q registers are possibly declared incorrectly."); *Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4); NumFPRegUnits += 4; - // If this bumps us past the boundary, don't allocate to a register - // and leave any previously speculatively consumed registers as consumed. + // If this bumps us past the boundary, don't allocate to a register and + // leave any previously speculatively consumed registers as consumed. if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) return false; } else if (Ty == IceType_f64) { @@ -491,8 +486,8 @@ NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2); *Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2); NumFPRegUnits += 2; - // If this bumps us past the boundary, don't allocate to a register - // and leave any previously speculatively consumed registers as consumed. + // If this bumps us past the boundary, don't allocate to a register and + // leave any previously speculatively consumed registers as consumed. if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) return false; } else { @@ -509,9 +504,9 @@ VarList &Args = Func->getArgs(); TargetARM32::CallingConv CC; - // For each register argument, replace Arg in the argument list with the - // home register. Then generate an instruction in the prolog to copy the - // home register to the assigned location of Arg. + // For each register argument, replace Arg in the argument list with the home + // register. Then generate an instruction in the prolog to copy the home + // register to the assigned location of Arg. Context.init(Func->getEntryNode()); Context.setInsertPoint(Context.getCur()); @@ -568,13 +563,12 @@ // Helper function for addProlog(). // -// This assumes Arg is an argument passed on the stack. This sets the -// frame offset for Arg and updates InArgsSizeBytes according to Arg's -// width. For an I64 arg that has been split into Lo and Hi components, -// it calls itself recursively on the components, taking care to handle -// Lo first because of the little-endian architecture. Lastly, this -// function generates an instruction to copy Arg into its assigned -// register if applicable. +// This assumes Arg is an argument passed on the stack. This sets the frame +// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an +// I64 arg that has been split into Lo and Hi components, it calls itself +// recursively on the components, taking care to handle Lo first because of the +// little-endian architecture. Lastly, this function generates an instruction +// to copy Arg into its assigned register if applicable. void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, size_t BasicFrameOffset, size_t &InArgsSizeBytes) { @@ -591,8 +585,8 @@ InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty); Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); InArgsSizeBytes += typeWidthInBytesOnStack(Ty); - // If the argument variable has been assigned a register, we need to load - // the value from the stack slot. + // If the argument variable has been assigned a register, we need to load the + // value from the stack slot. if (Arg->hasReg()) { assert(Ty != IceType_i64); OperandARM32Mem *Mem = OperandARM32Mem::create( @@ -606,10 +600,9 @@ } else { _ldr(Arg, Mem); } - // This argument-copying instruction uses an explicit - // OperandARM32Mem operand instead of a Variable, so its - // fill-from-stack operation has to be tracked separately for - // statistics. + // This argument-copying instruction uses an explicit OperandARM32Mem + // operand instead of a Variable, so its fill-from-stack operation has to + // be tracked separately for statistics. Ctx->statsUpdateFills(); } } @@ -642,16 +635,15 @@ // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 // * LocalsSpillAreaSize: area 5 // * SpillAreaSizeBytes: areas 2 - 6 - // Determine stack frame offsets for each Variable without a - // register assignment. This can be done as one variable per stack - // slot. Or, do coalescing by running the register allocator again - // with an infinite set of registers (as a side effect, this gives - // variables a second chance at physical register assignment). + // Determine stack frame offsets for each Variable without a register + // assignment. This can be done as one variable per stack slot. Or, do + // coalescing by running the register allocator again with an infinite set of + // registers (as a side effect, this gives variables a second chance at + // physical register assignment). // - // A middle ground approach is to leverage sparsity and allocate one - // block of space on the frame for globals (variables with - // multi-block lifetime), and one block to share for locals - // (single-block lifetime). + // A middle ground approach is to leverage sparsity and allocate one block of + // space on the frame for globals (variables with multi-block lifetime), and + // one block to share for locals (single-block lifetime). Context.init(Node); Context.setInsertPoint(Context.getCur()); @@ -661,14 +653,13 @@ RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); VarList SortedSpilledVariables; size_t GlobalsSize = 0; - // If there is a separate locals area, this represents that area. - // Otherwise it counts any variable not counted by GlobalsSize. + // If there is a separate locals area, this represents that area. Otherwise + // it counts any variable not counted by GlobalsSize. SpillAreaSizeBytes = 0; - // If there is a separate locals area, this specifies the alignment - // for it. + // If there is a separate locals area, this specifies the alignment for it. uint32_t LocalsSlotsAlignmentBytes = 0; - // The entire spill locations area gets aligned to largest natural - // alignment of the variables that have a spill slot. + // The entire spill locations area gets aligned to largest natural alignment + // of the variables that have a spill slot. uint32_t SpillAreaAlignmentBytes = 0; // For now, we don't have target-specific variables that need special // treatment (no stack-slot-linked SpillVariable type). @@ -682,12 +673,11 @@ uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; SpillAreaSizeBytes += GlobalsSize; - // Add push instructions for preserved registers. - // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15). - // Unlike x86, ARM also has callee-saved float/vector registers. - // The "vpush" instruction can handle a whole list of float/vector - // registers, but it only handles contiguous sequences of registers - // by specifying the start and the length. + // Add push instructions for preserved registers. On ARM, "push" can push a + // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has + // callee-saved float/vector registers. The "vpush" instruction can handle a + // whole list of float/vector registers, but it only handles contiguous + // sequences of registers by specifying the start and the length. VarList GPRsToPreserve; GPRsToPreserve.reserve(CalleeSaves.size()); uint32_t NumCallee = 0; @@ -704,8 +694,8 @@ } for (SizeT i = 0; i < CalleeSaves.size(); ++i) { if (CalleeSaves[i] && RegsUsed[i]) { - // TODO(jvoung): do separate vpush for each floating point - // register segment and += 4, or 8 depending on type. + // TODO(jvoung): do separate vpush for each floating point register + // segment and += 4, or 8 depending on type. ++NumCallee; PreservedRegsSizeBytes += 4; GPRsToPreserve.push_back(getPhysicalRegister(i)); @@ -724,10 +714,10 @@ Context.insert(InstFakeUse::create(Func, FP)); } - // Align the variables area. SpillAreaPaddingBytes is the size of - // the region after the preserved registers and before the spill areas. - // LocalsSlotsPaddingBytes is the amount of padding between the globals - // and locals area if they are separate. + // Align the variables area. SpillAreaPaddingBytes is the size of the region + // after the preserved registers and before the spill areas. + // LocalsSlotsPaddingBytes is the amount of padding between the globals and + // locals area if they are separate. assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); uint32_t SpillAreaPaddingBytes = 0; @@ -758,9 +748,9 @@ resetStackAdjustment(); - // Fill in stack offsets for stack args, and copy args into registers - // for those that were register-allocated. Args are pushed right to - // left, so Arg[0] is closest to the stack/frame pointer. + // Fill in stack offsets for stack args, and copy args into registers for + // those that were register-allocated. Args are pushed right to left, so + // Arg[0] is closest to the stack/frame pointer. Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); size_t BasicFrameOffset = PreservedRegsSizeBytes; if (!UsesFramePointer) @@ -830,8 +820,8 @@ if (RI == E) return; - // Convert the reverse_iterator position into its corresponding - // (forward) iterator position. + // Convert the reverse_iterator position into its corresponding (forward) + // iterator position. InstList::iterator InsertPoint = RI.base(); --InsertPoint; Context.init(Node); @@ -840,9 +830,9 @@ Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); if (UsesFramePointer) { Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); - // For late-stage liveness analysis (e.g. asm-verbose mode), - // adding a fake use of SP before the assignment of SP=FP keeps - // previous SP adjustments from being dead-code eliminated. + // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake + // use of SP before the assignment of SP=FP keeps previous SP adjustments + // from being dead-code eliminated. Context.insert(InstFakeUse::create(Func, SP)); _mov(SP, FP); } else { @@ -868,8 +858,8 @@ if (!MaybeLeafFunc) { CalleeSaves[RegARM32::Reg_lr] = true; } - // Pop registers in ascending order just like push - // (instead of in reverse order). + // Pop registers in ascending order just like push (instead of in reverse + // order). for (SizeT i = 0; i < CalleeSaves.size(); ++i) { if (CalleeSaves[i] && RegsUsed[i]) { GPRsToRestore.push_back(getPhysicalRegister(i)); @@ -903,17 +893,16 @@ bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { constexpr bool SignExt = false; - // TODO(jvoung): vldr of FP stack slots has a different limit from the - // plain stackSlotType(). + // TODO(jvoung): vldr of FP stack slots has a different limit from the plain + // stackSlotType(). return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); } StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, Variable *OrigBaseReg) { int32_t Offset = Var->getStackOffset(); - // Legalize will likely need a movw/movt combination, but if the top - // bits are all 0 from negating the offset and subtracting, we could - // use that instead. + // Legalize will likely need a movw/movt combination, but if the top bits are + // all 0 from negating the offset and subtracting, we could use that instead. bool ShouldSub = (-Offset & 0xFFFF0000) == 0; if (ShouldSub) Offset = -Offset; @@ -949,15 +938,15 @@ return; Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); int32_t StackAdjust = 0; - // Do a fairly naive greedy clustering for now. Pick the first stack slot + // Do a fairly naive greedy clustering for now. Pick the first stack slot // that's out of bounds and make a new base reg using the architecture's temp - // register. If that works for the next slot, then great. Otherwise, create - // a new base register, clobbering the previous base register. Never share a - // base reg across different basic blocks. This isn't ideal if local and + // register. If that works for the next slot, then great. Otherwise, create a + // new base register, clobbering the previous base register. Never share a + // base reg across different basic blocks. This isn't ideal if local and // multi-block variables are far apart and their references are interspersed. - // It may help to be more coordinated about assign stack slot numbers - // and may help to assign smaller offsets to higher-weight variables - // so that they don't depend on this legalization. + // It may help to be more coordinated about assign stack slot numbers and may + // help to assign smaller offsets to higher-weight variables so that they + // don't depend on this legalization. for (CfgNode *Node : Func->getNodes()) { Context.init(Node); StackVariable *NewBaseReg = nullptr; @@ -986,7 +975,7 @@ continue; } } - // For now, only Mov instructions can have stack variables. We need to + // For now, only Mov instructions can have stack variables. We need to // know the type of instruction because we currently create a fresh one // to replace Dest/Source, rather than mutate in place. auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr); @@ -1117,15 +1106,15 @@ static_cast<uint32_t>(Const->getValue() >> 32)); } if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) { - // Conservatively disallow memory operands with side-effects - // in case of duplication. + // Conservatively disallow memory operands with side-effects in case of + // duplication. assert(Mem->getAddrMode() == OperandARM32Mem::Offset || Mem->getAddrMode() == OperandARM32Mem::NegOffset); const Type SplitType = IceType_i32; if (Mem->isRegReg()) { // We have to make a temp variable T, and add 4 to either Base or Index. - // The Index may be shifted, so adding 4 can mean something else. - // Thus, prefer T := Base + 4, and use T as the new Base. + // The Index may be shifted, so adding 4 can mean something else. Thus, + // prefer T := Base + 4, and use T as the new Base. Variable *Base = Mem->getBase(); Constant *Four = Ctx->getConstantInt32(4); Variable *NewBase = Func->makeVariable(Base->getType()); @@ -1144,8 +1133,8 @@ // We have to make a temp variable and add 4 to either Base or Offset. // If we add 4 to Offset, this will convert a non-RegReg addressing // mode into a RegReg addressing mode. Since NaCl sandboxing disallows - // RegReg addressing modes, prefer adding to base and replacing instead. - // Thus we leave the old offset alone. + // RegReg addressing modes, prefer adding to base and replacing + // instead. Thus we leave the old offset alone. Constant *Four = Ctx->getConstantInt32(4); Variable *NewBase = Func->makeVariable(Base->getType()); lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, @@ -1195,11 +1184,11 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) { UsesFramePointer = true; - // Conservatively require the stack to be aligned. Some stack - // adjustment operations implemented below assume that the stack is - // aligned before the alloca. All the alloca code ensures that the - // stack alignment is preserved after the alloca. The stack alignment - // restriction can be relaxed in some cases. + // Conservatively require the stack to be aligned. Some stack adjustment + // operations implemented below assume that the stack is aligned before the + // alloca. All the alloca code ensures that the stack alignment is preserved + // after the alloca. The stack alignment restriction can be relaxed in some + // cases. NeedsStackAlignment = true; // TODO(stichnot): minimize the number of adjustments of SP, etc. @@ -1226,8 +1215,8 @@ Operand *SubAmount = legalize(Ctx->getConstantInt32(Value)); _sub(SP, SP, SubAmount); } else { - // Non-constant sizes need to be adjusted to the next highest - // multiple of the required alignment at runtime. + // Non-constant sizes need to be adjusted to the next highest multiple of + // the required alignment at runtime. TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex); Variable *T = makeReg(IceType_i32); _mov(T, TotalSize); @@ -1265,8 +1254,8 @@ case IceType_i64: { Variable *ScratchReg = makeReg(IceType_i32); _orrs(ScratchReg, SrcLoReg, SrcHi); - // ScratchReg isn't going to be used, but we need the - // side-effect of setting flags from this operation. + // ScratchReg isn't going to be used, but we need the side-effect of + // setting flags from this operation. Context.insert(InstFakeUse::create(Func, ScratchReg)); } } @@ -1310,21 +1299,21 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { Variable *Dest = Inst->getDest(); - // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier - // to legalize Src0 to flex or Src1 to flex and there is a reversible - // instruction. E.g., reverse subtract with immediate, register vs - // register, immediate. - // Or it may be the case that the operands aren't swapped, but the - // bits can be flipped and a different operation applied. - // E.g., use BIC (bit clear) instead of AND for some masks. + // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to + // legalize Src0 to flex or Src1 to flex and there is a reversible + // instruction. E.g., reverse subtract with immediate, register vs register, + // immediate. + // Or it may be the case that the operands aren't swapped, but the bits can + // be flipped and a different operation applied. E.g., use BIC (bit clear) + // instead of AND for some masks. Operand *Src0 = legalizeUndef(Inst->getSrc(0)); Operand *Src1 = legalizeUndef(Inst->getSrc(1)); if (Dest->getType() == IceType_i64) { - // These helper-call-involved instructions are lowered in this - // separate switch. This is because we would otherwise assume that - // we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused - // with helper calls, and such unused/redundant instructions will fail - // liveness analysis under -Om1 setting. + // These helper-call-involved instructions are lowered in this separate + // switch. This is because we would otherwise assume that we need to + // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with + // helper calls, and such unused/redundant instructions will fail liveness + // analysis under -Om1 setting. switch (Inst->getOp()) { default: break; @@ -1332,11 +1321,10 @@ case InstArithmetic::Sdiv: case InstArithmetic::Urem: case InstArithmetic::Srem: { - // Check for divide by 0 (ARM normally doesn't trap, but we want it - // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized - // to a register, which will hide a constant source operand. - // Instead, check the not-yet-legalized Src1 to optimize-out a divide - // by 0 check. + // Check for divide by 0 (ARM normally doesn't trap, but we want it to + // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a + // register, which will hide a constant source operand. Instead, check + // the not-yet-legalized Src1 to optimize-out a divide by 0 check. if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { if (C64->getValue() == 0) { _trap(); @@ -1348,8 +1336,8 @@ div0Check(IceType_i64, Src1Lo, Src1Hi); } // Technically, ARM has their own aeabi routines, but we can use the - // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, - // but uses the more standard __moddi3 for rem. + // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses + // the more standard __moddi3 for rem. const char *HelperName = ""; switch (Inst->getOp()) { default: @@ -1472,12 +1460,11 @@ // lsl t_lo, b.lo, c.lo // a.lo = t_lo // a.hi = t_hi - // Can be strength-reduced for constant-shifts, but we don't do - // that for now. - // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. - // On ARM, shifts only take the lower 8 bits of the shift register, - // and saturate to the range 0-32, so the negative value will - // saturate to 32. + // Can be strength-reduced for constant-shifts, but we don't do that for + // now. + // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On + // ARM, shifts only take the lower 8 bits of the shift register, and + // saturate to the range 0-32, so the negative value will saturate to 32. Variable *T_Hi = makeReg(IceType_i32); Variable *Src1RLo = legalizeToReg(Src1Lo); Constant *ThirtyTwo = Ctx->getConstantInt32(32); @@ -1493,8 +1480,8 @@ _mov(DestHi, T_Hi); Variable *T_Lo = makeReg(IceType_i32); // _mov seems to sometimes have better register preferencing than lsl. - // Otherwise mov w/ lsl shifted register is a pseudo-instruction - // that maps to lsl. + // Otherwise mov w/ lsl shifted register is a pseudo-instruction that + // maps to lsl. _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, OperandARM32::LSL, Src1RLo)); _mov(DestLo, T_Lo); @@ -1513,9 +1500,9 @@ // a.hi = t_hi case InstArithmetic::Ashr: { // a=b>>c (signed) ==> ... - // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, - // and the next orr should be conditioned on PLUS. The last two - // right shifts should also be arithmetic. + // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, and the + // next orr should be conditioned on PLUS. The last two right shifts + // should also be arithmetic. bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; Variable *T_Lo = makeReg(IceType_i32); Variable *Src1RLo = legalizeToReg(Src1Lo); @@ -1723,13 +1710,13 @@ Operand *NewSrc; if (Dest->hasReg()) { // If Dest already has a physical register, then legalize the Src operand - // into a Variable with the same register assignment. This especially + // into a Variable with the same register assignment. This especially // helps allow the use of Flex operands. NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); } else { - // Dest could be a stack operand. Since we could potentially need - // to do a Store (and store can only have Register operands), - // legalize this to a register. + // Dest could be a stack operand. Since we could potentially need to do a + // Store (and store can only have Register operands), legalize this to a + // register. NewSrc = legalize(Src0, Legal_Reg); } if (isVectorType(Dest->getType())) { @@ -1810,25 +1797,24 @@ } } - // Adjust the parameter area so that the stack is aligned. It is - // assumed that the stack is already aligned at the start of the - // calling sequence. + // Adjust the parameter area so that the stack is aligned. It is assumed that + // the stack is already aligned at the start of the calling sequence. ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); - // Subtract the appropriate amount for the argument area. This also - // takes care of setting the stack adjustment during emission. + // Subtract the appropriate amount for the argument area. This also takes + // care of setting the stack adjustment during emission. // - // TODO: If for some reason the call instruction gets dead-code - // eliminated after lowering, we would need to ensure that the - // pre-call and the post-call esp adjustment get eliminated as well. + // TODO: If for some reason the call instruction gets dead-code eliminated + // after lowering, we would need to ensure that the pre-call and the + // post-call esp adjustment get eliminated as well. if (ParameterAreaSizeBytes) { Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), Legal_Reg | Legal_Flex); _adjust_stack(ParameterAreaSizeBytes, SubAmount); } - // Copy arguments that are passed on the stack to the appropriate - // stack locations. + // Copy arguments that are passed on the stack to the appropriate stack + // locations. Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); for (auto &StackArg : StackArgs) { ConstantInteger32 *Loc = @@ -1850,9 +1836,9 @@ // Copy arguments to be passed in registers to the appropriate registers. for (auto &GPRArg : GPRArgs) { Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); - // Generate a FakeUse of register arguments so that they do not get - // dead code eliminated as a result of the FakeKill of scratch - // registers after the call. + // Generate a FakeUse of register arguments so that they do not get dead + // code eliminated as a result of the FakeKill of scratch registers after + // the call. Context.insert(InstFakeUse::create(Func, Reg)); } for (auto &FPArg : FPArgs) { @@ -1860,8 +1846,8 @@ Context.insert(InstFakeUse::create(Func, Reg)); } - // Generate the call instruction. Assign its result to a temporary - // with high register allocation weight. + // Generate the call instruction. Assign its result to a temporary with high + // register allocation weight. Variable *Dest = Instr->getDest(); // ReturnReg doubles as ReturnRegLo as necessary. Variable *ReturnReg = nullptr; @@ -1901,12 +1887,12 @@ } } Operand *CallTarget = Instr->getCallTarget(); - // TODO(jvoung): Handle sandboxing. - // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); + // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing = + // Ctx->getFlags().getUseSandboxing(); - // Allow ConstantRelocatable to be left alone as a direct call, - // but force other constants like ConstantInteger32 to be in - // a register and make it an indirect call. + // Allow ConstantRelocatable to be left alone as a direct call, but force + // other constants like ConstantInteger32 to be in a register and make it an + // indirect call. if (!llvm::isa<ConstantRelocatable>(CallTarget)) { CallTarget = legalize(CallTarget, Legal_Reg); } @@ -1915,8 +1901,8 @@ if (ReturnRegHi) Context.insert(InstFakeDef::create(Func, ReturnRegHi)); - // Add the appropriate offset to SP. The call instruction takes care - // of resetting the stack offset during emission. + // Add the appropriate offset to SP. The call instruction takes care of + // resetting the stack offset during emission. if (ParameterAreaSizeBytes) { Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), Legal_Reg | Legal_Flex); @@ -2024,8 +2010,8 @@ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); Variable *T_Lo = makeReg(DestLo->getType()); - // i32 and i1 can just take up the whole register. - // i32 doesn't need uxt, while i1 will have an and mask later anyway. + // i32 and i1 can just take up the whole register. i32 doesn't need uxt, + // while i1 will have an and mask later anyway. if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); _mov(T_Lo, Src0RF); @@ -2046,9 +2032,9 @@ Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); Constant *One = Ctx->getConstantInt32(1); Variable *T = makeReg(Dest->getType()); - // Just use _mov instead of _uxt since all registers are 32-bit. - // _uxt requires the source to be a register so could have required - // a _mov from legalize anyway. + // Just use _mov instead of _uxt since all registers are 32-bit. _uxt + // requires the source to be a register so could have required a _mov + // from legalize anyway. _mov(T, Src0RF); _and(T, T, One); _mov(Dest, T); @@ -2288,8 +2274,8 @@ // mov.<C2> t, #0 mov.<C2> t, #0 // mov a, t mov a, t // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" - // is used for signed compares. In some cases, b and c need to be swapped - // as well. + // is used for signed compares. In some cases, b and c need to be swapped as + // well. // // LLVM does: // for EQ and NE: @@ -2299,13 +2285,13 @@ // mov.<C> t, #1 // mov a, t // - // that's nice in that it's just as short but has fewer dependencies - // for better ILP at the cost of more registers. + // that's nice in that it's just as short but has fewer dependencies for + // better ILP at the cost of more registers. // - // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with - // two unconditional mov #0, two cmps, two conditional mov #1, - // and one conditonal reg mov. That has few dependencies for good ILP, - // but is a longer sequence. + // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two + // unconditional mov #0, two cmps, two conditional mov #1, and one + // conditional reg mov. That has few dependencies for good ILP, but is a + // longer sequence. // // So, we are going with the GCC version since it's usually better (except // perhaps for eq/ne). We could revisit special-casing eq/ne later. @@ -2333,8 +2319,8 @@ Variable *ScratchReg = makeReg(IceType_i32); _cmp(Src0Lo, Src1LoRF); _sbcs(ScratchReg, Src0Hi, Src1HiRF); - // ScratchReg isn't going to be used, but we need the - // side-effect of setting flags from this operation. + // ScratchReg isn't going to be used, but we need the side-effect of + // setting flags from this operation. Context.insert(InstFakeUse::create(Func, ScratchReg)); } else { _cmp(Src0Hi, Src1HiRF); @@ -2354,8 +2340,8 @@ // mov.C1 t, #0 // mov.C2 t, #1 // mov a, t - // where the unsigned/sign extension is not needed for 32-bit. - // They also have special cases for EQ and NE. E.g., for NE: + // where the unsigned/sign extension is not needed for 32-bit. They also have + // special cases for EQ and NE. E.g., for NE: // <extend to tb, tc> // subs t, tb, tc // movne t, #1 @@ -2368,13 +2354,13 @@ // mov.<C> t, #1 // mov a, t // - // the left shift is by 0, 16, or 24, which allows the comparison to focus - // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned). - // For the unsigned case, for some reason it does similar to GCC and does - // a uxtb first. It's not clear to me why that special-casing is needed. + // the left shift is by 0, 16, or 24, which allows the comparison to focus on + // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For + // the unsigned case, for some reason it does similar to GCC and does a uxtb + // first. It's not clear to me why that special-casing is needed. // - // We'll go with the LLVM way for now, since it's shorter and has just as - // few dependencies. + // We'll go with the LLVM way for now, since it's shorter and has just as few + // dependencies. int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); assert(ShiftAmt >= 0); Constant *ShiftConst = nullptr; @@ -2417,9 +2403,9 @@ UnimplementedError(Func->getContext()->getFlags()); return; case Intrinsics::AtomicFenceAll: - // NOTE: FenceAll should prevent and load/store from being moved - // across the fence (both atomic and non-atomic). The InstARM32Mfence - // instruction is currently marked coarsely as "HasSideEffects". + // NOTE: FenceAll should prevent and load/store from being moved across the + // fence (both atomic and non-atomic). The InstARM32Mfence instruction is + // currently marked coarsely as "HasSideEffects". UnimplementedError(Func->getContext()->getFlags()); return; case Intrinsics::AtomicIsLockFree: { @@ -2477,10 +2463,10 @@ Call->addArg(Val); lowerCall(Call); // The popcount helpers always return 32-bit values, while the intrinsic's - // signature matches some 64-bit platform's native instructions and - // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest - // just in case the user doesn't do that in the IR or doesn't toss the bits - // via truncate. + // signature matches some 64-bit platform's native instructions and expect + // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in + // case the user doesn't do that in the IR or doesn't toss the bits via + // truncate. if (Val->getType() == IceType_i64) { Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); Constant *Zero = Ctx->getConstantZero(IceType_i32); @@ -2491,8 +2477,8 @@ return; } case Intrinsics::Ctlz: { - // The "is zero undef" parameter is ignored and we always return - // a well-defined value. + // The "is zero undef" parameter is ignored and we always return a + // well-defined value. Operand *Val = Instr->getArg(0); Variable *ValLoR; Variable *ValHiR = nullptr; @@ -2639,9 +2625,9 @@ Variable *T2 = makeReg(IceType_i32); _add(T2, T, ThirtyTwo); _clz(T2, ValHiR, CondARM32::NE); - // T2 is actually a source as well when the predicate is not AL - // (since it may leave T2 alone). We use set_dest_nonkillable to - // prolong the liveness of T2 as if it was used as a source. + // T2 is actually a source as well when the predicate is not AL (since it + // may leave T2 alone). We use set_dest_nonkillable to prolong the liveness + // of T2 as if it was used as a source. _set_dest_nonkillable(); _mov(DestLo, T2); Variable *T3 = nullptr; @@ -2654,15 +2640,14 @@ } void TargetARM32::lowerLoad(const InstLoad *Load) { - // A Load instruction can be treated the same as an Assign - // instruction, after the source operand is transformed into an - // OperandARM32Mem operand. + // A Load instruction can be treated the same as an Assign instruction, after + // the source operand is transformed into an OperandARM32Mem operand. Type Ty = Load->getDest()->getType(); Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); Variable *DestLoad = Load->getDest(); - // TODO(jvoung): handled folding opportunities. Sign and zero extension - // can be folded into a load. + // TODO(jvoung): handled folding opportunities. Sign and zero extension can + // be folded into a load. InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); lowerAssign(Assign); } @@ -2708,17 +2693,15 @@ _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0); } } - // Add a ret instruction even if sandboxing is enabled, because - // addEpilog explicitly looks for a ret instruction as a marker for - // where to insert the frame removal instructions. - // addEpilog is responsible for restoring the "lr" register as needed - // prior to this ret instruction. + // Add a ret instruction even if sandboxing is enabled, because addEpilog + // explicitly looks for a ret instruction as a marker for where to insert the + // frame removal instructions. addEpilog is responsible for restoring the + // "lr" register as needed prior to this ret instruction. _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); - // Add a fake use of sp to make sure sp stays alive for the entire - // function. Otherwise post-call sp adjustments get dead-code - // eliminated. TODO: Are there more places where the fake use - // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not - // have a ret instruction. + // Add a fake use of sp to make sure sp stays alive for the entire function. + // Otherwise post-call sp adjustments get dead-code eliminated. + // TODO: Are there more places where the fake use should be inserted? E.g. + // "void f(int n){while(1) g(n);}" may not have a ret instruction. Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); Context.insert(InstFakeUse::create(Func, SP)); } @@ -2852,8 +2835,8 @@ if (isVectorType(Ty) || isFloatingType(Ty)) { _vmov(Reg, Src); } else { - // Mov's Src operand can really only be the flexible second operand type - // or a register. Users should guarantee that. + // Mov's Src operand can really only be the flexible second operand type or + // a register. Users should guarantee that. _mov(Reg, Src); } return Reg; @@ -2862,18 +2845,17 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, int32_t RegNum) { Type Ty = From->getType(); - // Assert that a physical register is allowed. To date, all calls - // to legalize() allow a physical register. Legal_Flex converts - // registers to the right type OperandARM32FlexReg as needed. + // Assert that a physical register is allowed. To date, all calls to + // legalize() allow a physical register. Legal_Flex converts registers to the + // right type OperandARM32FlexReg as needed. assert(Allowed & Legal_Reg); - // Go through the various types of operands: - // OperandARM32Mem, OperandARM32Flex, Constant, and Variable. - // Given the above assertion, if type of operand is not legal - // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy - // to a register. + // Go through the various types of operands: OperandARM32Mem, + // OperandARM32Flex, Constant, and Variable. Given the above assertion, if + // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we + // can always copy to a register. if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) { - // Before doing anything with a Mem operand, we need to ensure - // that the Base and Index components are in physical registers. + // Before doing anything with a Mem operand, we need to ensure that the + // Base and Index components are in physical registers. Variable *Base = Mem->getBase(); Variable *Index = Mem->getIndex(); Variable *RegBase = nullptr; @@ -2918,8 +2900,8 @@ if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) { if (FlexReg->getShiftOp() == OperandARM32::kNoShift) { From = FlexReg->getReg(); - // Fall through and let From be checked as a Variable below, - // where it may or may not need a register. + // Fall through and let From be checked as a Variable below, where it + // may or may not need a register. } else { return copyToReg(Flex, RegNum); } @@ -2944,10 +2926,10 @@ uint32_t RotateAmt; uint32_t Immed_8; uint32_t Value = static_cast<uint32_t>(C32->getValue()); - // Check if the immediate will fit in a Flexible second operand, - // if a Flexible second operand is allowed. We need to know the exact - // value, so that rules out relocatable constants. - // Also try the inverse and use MVN if possible. + // Check if the immediate will fit in a Flexible second operand, if a + // Flexible second operand is allowed. We need to know the exact value, + // so that rules out relocatable constants. Also try the inverse and use + // MVN if possible. if (CanBeFlex && OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); @@ -2977,12 +2959,12 @@ } else { assert(isScalarFloatingType(Ty)); // Load floats/doubles from literal pool. - // TODO(jvoung): Allow certain immediates to be encoded directly in - // an operand. See Table A7-18 of the ARM manual: - // "Floating-point modified immediate constants". - // Or, for 32-bit floating point numbers, just encode the raw bits - // into a movw/movt pair to GPR, and vmov to an SREG, instead of using - // a movw/movt pair to get the const-pool address then loading to SREG. + // TODO(jvoung): Allow certain immediates to be encoded directly in an + // operand. See Table A7-18 of the ARM manual: "Floating-point modified + // immediate constants". Or, for 32-bit floating point numbers, just + // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG, + // instead of using a movw/movt pair to get the const-pool address then + // loading to SREG. std::string Buffer; llvm::raw_string_ostream StrBuf(Buffer); llvm::cast<Constant>(From)->emitPoolLabel(StrBuf); @@ -2997,9 +2979,9 @@ } if (auto Var = llvm::dyn_cast<Variable>(From)) { - // Check if the variable is guaranteed a physical register. This - // can happen either when the variable is pre-colored or when it is - // assigned infinite weight. + // Check if the variable is guaranteed a physical register. This can happen + // either when the variable is pre-colored or when it is assigned infinite + // weight. bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); // We need a new physical register for the operand if: // Mem is not allowed and Var isn't guaranteed a physical @@ -3025,17 +3007,16 @@ Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) { Type Ty = From->getType(); if (llvm::isa<ConstantUndef>(From)) { - // Lower undefs to zero. Another option is to lower undefs to an - // uninitialized register; however, using an uninitialized register - // results in less predictable code. + // Lower undefs to zero. Another option is to lower undefs to an + // uninitialized register; however, using an uninitialized register results + // in less predictable code. // - // If in the future the implementation is changed to lower undef - // values to uninitialized registers, a FakeDef will be needed: - // Context.insert(InstFakeDef::create(Func, Reg)); - // This is in order to ensure that the live range of Reg is not - // overestimated. If the constant being lowered is a 64 bit value, - // then the result should be split and the lo and hi components will - // need to go in uninitialized registers. + // If in the future the implementation is changed to lower undef values to + // uninitialized registers, a FakeDef will be needed: + // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to + // ensure that the live range of Reg is not overestimated. If the constant + // being lowered is a 64 bit value, then the result should be split and the + // lo and hi components will need to go in uninitialized registers. if (isVectorType(Ty)) return makeVectorOfZeros(Ty, RegNum); return Ctx->getConstantZero(Ty); @@ -3045,15 +3026,15 @@ OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) { OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand); - // It may be the case that address mode optimization already creates - // an OperandARM32Mem, so in that case it wouldn't need another level - // of transformation. + // It may be the case that address mode optimization already creates an + // OperandARM32Mem, so in that case it wouldn't need another level of + // transformation. if (Mem) { return llvm::cast<OperandARM32Mem>(legalize(Mem)); } - // If we didn't do address mode optimization, then we only - // have a base/offset to work with. ARM always requires a base - // register, so just use that to hold the operand. + // If we didn't do address mode optimization, then we only have a base/offset + // to work with. ARM always requires a base register, so just use that to + // hold the operand. Variable *Base = legalizeToReg(Operand); return OperandARM32Mem::create( Func, Ty, Base, @@ -3076,9 +3057,9 @@ uint32_t RotateAmt; uint32_t Immed_8; Operand *Mask; - // Use AND or BIC to mask off the bits, depending on which immediate fits - // (if it fits at all). Assume Align is usually small, in which case BIC - // works better. Thus, this rounds down to the alignment. + // Use AND or BIC to mask off the bits, depending on which immediate fits (if + // it fits at all). Assume Align is usually small, in which case BIC works + // better. Thus, this rounds down to the alignment. if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex); _bic(Reg, Reg, Mask); @@ -3170,17 +3151,18 @@ OstreamLocker L(Ctx); Ostream &Str = Ctx->getStrEmit(); Str << ".syntax unified\n"; - // Emit build attributes in format: .eabi_attribute TAG, VALUE. - // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture" - // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf + // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of + // "Addenda to, and Errata in the ABI for the ARM architecture" + // http://infocenter.arm.com + // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf // - // Tag_conformance should be be emitted first in a file-scope - // sub-subsection of the first public subsection of the attributes. + // Tag_conformance should be be emitted first in a file-scope sub-subsection + // of the first public subsection of the attributes. Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n"; - // Chromebooks are at least A15, but do A9 for higher compat. - // For some reason, the LLVM ARM asm parser has the .cpu directive override - // the mattr specified on the commandline. So to test hwdiv, we need to set - // the .cpu directive higher (can't just rely on --mattr=...). + // Chromebooks are at least A15, but do A9 for higher compat. For some + // reason, the LLVM ARM asm parser has the .cpu directive override the mattr + // specified on the commandline. So to test hwdiv, we need to set the .cpu + // directive higher (can't just rely on --mattr=...). if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { Str << ".cpu cortex-a15\n"; } else {
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h index 57e0b5a..5578289 100644 --- a/src/IceTargetLoweringARM32.h +++ b/src/IceTargetLoweringARM32.h
@@ -78,8 +78,8 @@ SizeT getReservedTmpReg() const { return RegARM32::Reg_ip; } size_t typeWidthInBytesOnStack(Type Ty) const override { - // Round up to the next multiple of 4 bytes. In particular, i1, - // i8, and i16 are rounded up to 4 bytes. + // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16 + // are rounded up to 4 bytes. return (typeWidthInBytes(Ty) + 3) & ~3; } @@ -101,9 +101,8 @@ void addProlog(CfgNode *Node) override; void addEpilog(CfgNode *Node) override; - /// Ensure that a 64-bit Variable has been split into 2 32-bit - /// Variables, creating them if necessary. This is needed for all - /// I64 operations. + /// Ensure that a 64-bit Variable has been split into 2 32-bit Variables, + /// creating them if necessary. This is needed for all I64 operations. void split64(Variable *Var); Operand *loOperand(Operand *Operand); Operand *hiOperand(Operand *Operand); @@ -147,8 +146,8 @@ enum OperandLegalization { Legal_None = 0, Legal_Reg = 1 << 0, /// physical register, not stack location - Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated - /// small immediates, or shifted registers. + Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small + /// immediates, or shifted registers. Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12] Legal_All = ~Legal_None }; @@ -171,9 +170,8 @@ const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const override; - // If a divide-by-zero check is needed, inserts a: - // test; branch .LSKIP; trap; .LSKIP: <continuation>. - // If no check is needed nothing is inserted. + // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap; + // .LSKIP: <continuation>. If no check is needed nothing is inserted. void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi); using ExtInstr = void (TargetARM32::*)(Variable *, Variable *, CondARM32::Cond); @@ -185,9 +183,9 @@ void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi); - // The following are helpers that insert lowered ARM32 instructions - // with minimal syntactic overhead, so that the lowering code can - // look as close to assembly as practical. + // The following are helpers that insert lowered ARM32 instructions with + // minimal syntactic overhead, so that the lowering code can look as close to + // assembly as practical. void _add(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { @@ -265,9 +263,9 @@ CondARM32::Cond Pred = CondARM32::AL) { Context.insert(InstARM32Mls::create(Func, Dest, Src0, Src1, Acc, Pred)); } - /// If Dest=nullptr is passed in, then a new variable is created, - /// marked as infinite register allocation weight, and returned - /// through the in/out Dest argument. + /// If Dest=nullptr is passed in, then a new variable is created, marked as + /// infinite register allocation weight, and returned through the in/out Dest + /// argument. void _mov(Variable *&Dest, Operand *Src0, CondARM32::Cond Pred = CondARM32::AL, int32_t RegNum = Variable::NoRegister) { @@ -281,8 +279,8 @@ NewInst->setDestNonKillable(); Context.insert(NewInst); } - /// The Operand can only be a 16-bit immediate or a ConstantRelocatable - /// (with an upper16 relocation). + /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with + /// an upper16 relocation). void _movt(Variable *Dest, Operand *Src0, CondARM32::Cond Pred = CondARM32::AL) { Context.insert(InstARM32Movt::create(Func, Dest, Src0, Pred)); @@ -378,8 +376,8 @@ Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert( InstARM32Umull::create(Func, DestLo, DestHi, Src0, Src1, Pred)); - // Model the modification to the second dest as a fake def. - // Note that the def is not predicated. + // Model the modification to the second dest as a fake def. Note that the + // def is not predicated. Context.insert(InstFakeDef::create(Func, DestHi, DestLo)); } void _uxt(Variable *Dest, Variable *Src0, @@ -400,11 +398,10 @@ CondARM32::Cond Pred = CondARM32::AL) { Context.insert(InstARM32Vldr::create(Func, Dest, Src, Pred)); } - // There are a whole bunch of vmov variants, to transfer within - // S/D/Q registers, between core integer registers and S/D, - // and from small immediates into S/D. - // For integer -> S/D/Q there is a variant which takes two integer - // register to fill a D, or to fill two consecutive S registers. + // There are a whole bunch of vmov variants, to transfer within S/D/Q + // registers, between core integer registers and S/D, and from small + // immediates into S/D. For integer -> S/D/Q there is a variant which takes + // two integer register to fill a D, or to fill two consecutive S registers. // Vmov can also be used to insert-element. E.g., // "vmov.8 d0[1], r0" // but insert-element is a "two-address" operation where only part of the @@ -440,8 +437,8 @@ } /// Run a pass through stack variables and ensure that the offsets are legal. - /// If the offset is not legal, use a new base register that accounts for - /// the offset, such that the addressing mode offset bits are now legal. + /// If the offset is not legal, use a new base register that accounts for the + /// offset, such that the addressing mode offset bits are now legal. void legalizeStackSlots(); /// Returns true if the given Offset can be represented in a stack ldr/str. bool isLegalVariableStackOffset(int32_t Offset) const; @@ -464,11 +461,11 @@ /// Helper class that understands the Calling Convention and register /// assignments. The first few integer type parameters can use r0-r3, /// regardless of their position relative to the floating-point/vector - /// arguments in the argument list. Floating-point and vector arguments - /// can use q0-q3 (aka d0-d7, s0-s15). Technically, arguments that can - /// start with registers but extend beyond the available registers can be - /// split between the registers and the stack. However, this is typically - /// for passing GPR structs by value, and PNaCl transforms expand this out. + /// arguments in the argument list. Floating-point and vector arguments can + /// use q0-q3 (aka d0-d7, s0-s15). Technically, arguments that can start with + /// registers but extend beyond the available registers can be split between + /// the registers and the stack. However, this is typically for passing GPR + /// structs by value, and PNaCl transforms expand this out. /// /// Also, at the point before the call, the stack must be aligned. class CallingConv {
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp index 080e56b..b634306 100644 --- a/src/IceTargetLoweringMIPS32.cpp +++ b/src/IceTargetLoweringMIPS32.cpp
@@ -43,9 +43,8 @@ } // end of anonymous namespace TargetMIPS32::TargetMIPS32(Cfg *Func) : TargetLowering(Func) { - // TODO: Don't initialize IntegerRegisters and friends every time. - // Instead, initialize in some sort of static initializer for the - // class. + // TODO: Don't initialize IntegerRegisters and friends every time. Instead, + // initialize in some sort of static initializer for the class. llvm::SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM); llvm::SmallBitVector FloatRegisters(RegMIPS32::Reg_NUM); llvm::SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM); @@ -105,19 +104,18 @@ // Argument lowering Func->doArgLowering(); - // Target lowering. This requires liveness analysis for some parts - // of the lowering decisions, such as compare/branch fusing. If - // non-lightweight liveness analysis is used, the instructions need - // to be renumbered first. TODO: This renumbering should only be - // necessary if we're actually calculating live intervals, which we - // only do for register allocation. + // Target lowering. This requires liveness analysis for some parts of the + // lowering decisions, such as compare/branch fusing. If non-lightweight + // liveness analysis is used, the instructions need to be renumbered first. + // TODO: This renumbering should only be necessary if we're actually + // calculating live intervals, which we only do for register allocation. Func->renumberInstructions(); if (Func->hasError()) return; - // TODO: It should be sufficient to use the fastest liveness - // calculation, i.e. livenessLightweight(). However, for some - // reason that slows down the rest of the translation. Investigate. + // TODO: It should be sufficient to use the fastest liveness calculation, + // i.e. livenessLightweight(). However, for some reason that slows down the + // rest of the translation. Investigate. Func->liveness(Liveness_Basic); if (Func->hasError()) return; @@ -128,19 +126,19 @@ return; Func->dump("After MIPS32 codegen"); - // Register allocation. This requires instruction renumbering and - // full liveness analysis. + // Register allocation. This requires instruction renumbering and full + // liveness analysis. Func->renumberInstructions(); if (Func->hasError()) return; Func->liveness(Liveness_Intervals); if (Func->hasError()) return; - // Validate the live range computations. The expensive validation - // call is deliberately only made when assertions are enabled. + // Validate the live range computations. The expensive validation call is + // deliberately only made when assertions are enabled. assert(Func->validateLiveness()); - // The post-codegen dump is done here, after liveness analysis and - // associated cleanup, to make the dump cleaner and more useful. + // The post-codegen dump is done here, after liveness analysis and associated + // cleanup, to make the dump cleaner and more useful. Func->dump("After initial MIPS32 codegen"); Func->getVMetadata()->init(VMK_All); regAlloc(RAK_Global); @@ -162,11 +160,10 @@ Func->contractEmptyNodes(); Func->reorderNodes(); - // Branch optimization. This needs to be done just before code - // emission. In particular, no transformations that insert or - // reorder CfgNodes should be done after branch optimization. We go - // ahead and do it before nop insertion to reduce the amount of work - // needed for searching for opportunities. + // Branch optimization. This needs to be done just before code emission. In + // particular, no transformations that insert or reorder CfgNodes should be + // done after branch optimization. We go ahead and do it before nop insertion + // to reduce the amount of work needed for searching for opportunities. Func->doBranchOpt(); Func->dump("After branch optimization"); @@ -246,8 +243,8 @@ Reg = Func->makeVariable(Ty); Reg->setRegNum(RegNum); PhysicalRegisters[Ty][RegNum] = Reg; - // Specially mark SP as an "argument" so that it is considered - // live upon function entry. + // Specially mark SP as an "argument" so that it is considered live upon + // function entry. if (RegNum == RegMIPS32::Reg_SP || RegNum == RegMIPS32::Reg_RA) { Func->addImplicitArg(Reg); Reg->setIgnoreLiveness(); @@ -321,11 +318,11 @@ void TargetMIPS32::lowerAlloca(const InstAlloca *Inst) { UsesFramePointer = true; - // Conservatively require the stack to be aligned. Some stack - // adjustment operations implemented below assume that the stack is - // aligned before the alloca. All the alloca code ensures that the - // stack alignment is preserved after the alloca. The stack alignment - // restriction can be relaxed in some cases. + // Conservatively require the stack to be aligned. Some stack adjustment + // operations implemented below assume that the stack is aligned before the + // alloca. All the alloca code ensures that the stack alignment is preserved + // after the alloca. The stack alignment restriction can be relaxed in some + // cases. NeedsStackAlignment = true; (void)Inst; UnimplementedError(Func->getContext()->getFlags()); @@ -483,9 +480,9 @@ UnimplementedError(Func->getContext()->getFlags()); return; case Intrinsics::AtomicFenceAll: - // NOTE: FenceAll should prevent and load/store from being moved - // across the fence (both atomic and non-atomic). The InstMIPS32Mfence - // instruction is currently marked coarsely as "HasSideEffects". + // NOTE: FenceAll should prevent and load/store from being moved across the + // fence (both atomic and non-atomic). The InstMIPS32Mfence instruction is + // currently marked coarsely as "HasSideEffects". UnimplementedError(Func->getContext()->getFlags()); return; case Intrinsics::AtomicIsLockFree: { @@ -549,9 +546,8 @@ return; } case Intrinsics::Memset: { - // The value operand needs to be extended to a stack slot size - // because the PNaCl ABI requires arguments to be at least 32 bits - // wide. + // The value operand needs to be extended to a stack slot size because the + // PNaCl ABI requires arguments to be at least 32 bits wide. Operand *ValOp = Instr->getArg(1); assert(ValOp->getType() == IceType_i8); Variable *ValExt = Func->makeVariable(stackSlotType()); @@ -651,10 +647,9 @@ UnimplementedError(Func->getContext()->getFlags()); } -// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to -// preserve integrity of liveness analysis. Undef values are also -// turned into zeroes, since loOperand() and hiOperand() don't expect -// Undef input. +// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve +// integrity of liveness analysis. Undef values are also turned into zeroes, +// since loOperand() and hiOperand() don't expect Undef input. void TargetMIPS32::prelowerPhis() { UnimplementedError(Func->getContext()->getFlags()); } @@ -662,8 +657,8 @@ void TargetMIPS32::postLower() { if (Ctx->getFlags().getOptLevel() == Opt_m1) return; - // Find two-address non-SSA instructions where Dest==Src0, and set - // the DestNonKillable flag to keep liveness analysis consistent. + // Find two-address non-SSA instructions where Dest==Src0, and set the + // DestNonKillable flag to keep liveness analysis consistent. UnimplementedError(Func->getContext()->getFlags()); }
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h index 52c09cf..1ae0b28 100644 --- a/src/IceTargetLoweringMIPS32.h +++ b/src/IceTargetLoweringMIPS32.h
@@ -52,8 +52,8 @@ return UsesFramePointer ? RegMIPS32::Reg_FP : RegMIPS32::Reg_SP; } size_t typeWidthInBytesOnStack(Type Ty) const override { - // Round up to the next multiple of 4 bytes. In particular, i1, - // i8, and i16 are rounded up to 4 bytes. + // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16 + // are rounded up to 4 bytes. return (typeWidthInBytes(Ty) + 3) & ~3; }
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp index f06150f..47f6ae1 100644 --- a/src/IceTargetLoweringX8632.cpp +++ b/src/IceTargetLoweringX8632.cpp
@@ -8,9 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file implements the TargetLoweringX8632 class, which -/// consists almost entirely of the lowering sequence for each -/// high-level instruction. +/// This file implements the TargetLoweringX8632 class, which consists almost +/// entirely of the lowering sequence for each high-level instruction. /// //===----------------------------------------------------------------------===// @@ -100,24 +99,21 @@ void TargetX8632::lowerCall(const InstCall *Instr) { // x86-32 calling convention: // - // * At the point before the call, the stack must be aligned to 16 - // bytes. + // * At the point before the call, the stack must be aligned to 16 bytes. // - // * The first four arguments of vector type, regardless of their - // position relative to the other arguments in the argument list, are - // placed in registers xmm0 - xmm3. + // * The first four arguments of vector type, regardless of their position + // relative to the other arguments in the argument list, are placed in + // registers xmm0 - xmm3. // - // * Other arguments are pushed onto the stack in right-to-left order, - // such that the left-most argument ends up on the top of the stack at - // the lowest memory address. + // * Other arguments are pushed onto the stack in right-to-left order, such + // that the left-most argument ends up on the top of the stack at the lowest + // memory address. // - // * Stack arguments of vector type are aligned to start at the next - // highest multiple of 16 bytes. Other stack arguments are aligned to - // 4 bytes. + // * Stack arguments of vector type are aligned to start at the next highest + // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes. // - // This intends to match the section "IA-32 Function Calling - // Convention" of the document "OS X ABI Function Call Guide" by - // Apple. + // This intends to match the section "IA-32 Function Calling Convention" of + // the document "OS X ABI Function Call Guide" by Apple. NeedsStackAlignment = true; using OperandList = std::vector<Operand *>; @@ -149,46 +145,44 @@ } } - // Adjust the parameter area so that the stack is aligned. It is - // assumed that the stack is already aligned at the start of the - // calling sequence. + // Adjust the parameter area so that the stack is aligned. It is assumed that + // the stack is already aligned at the start of the calling sequence. ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); - // Subtract the appropriate amount for the argument area. This also - // takes care of setting the stack adjustment during emission. + // Subtract the appropriate amount for the argument area. This also takes + // care of setting the stack adjustment during emission. // - // TODO: If for some reason the call instruction gets dead-code - // eliminated after lowering, we would need to ensure that the - // pre-call and the post-call esp adjustment get eliminated as well. + // TODO: If for some reason the call instruction gets dead-code eliminated + // after lowering, we would need to ensure that the pre-call and the + // post-call esp adjustment get eliminated as well. if (ParameterAreaSizeBytes) { _adjust_stack(ParameterAreaSizeBytes); } - // Copy arguments that are passed on the stack to the appropriate - // stack locations. + // Copy arguments that are passed on the stack to the appropriate stack + // locations. for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); } - // Copy arguments to be passed in registers to the appropriate - // registers. - // TODO: Investigate the impact of lowering arguments passed in - // registers after lowering stack arguments as opposed to the other - // way around. Lowering register arguments after stack arguments may - // reduce register pressure. On the other hand, lowering register - // arguments first (before stack arguments) may result in more compact - // code, as the memory operand displacements may end up being smaller - // before any stack adjustment is done. + // Copy arguments to be passed in registers to the appropriate registers. + // TODO: Investigate the impact of lowering arguments passed in registers + // after lowering stack arguments as opposed to the other way around. + // Lowering register arguments after stack arguments may reduce register + // pressure. On the other hand, lowering register arguments first (before + // stack arguments) may result in more compact code, as the memory operand + // displacements may end up being smaller before any stack adjustment is + // done. for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { Variable *Reg = legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); - // Generate a FakeUse of register arguments so that they do not get - // dead code eliminated as a result of the FakeKill of scratch - // registers after the call. + // Generate a FakeUse of register arguments so that they do not get dead + // code eliminated as a result of the FakeKill of scratch registers after + // the call. Context.insert(InstFakeUse::create(Func, Reg)); } - // Generate the call instruction. Assign its result to a temporary - // with high register allocation weight. + // Generate the call instruction. Assign its result to a temporary with high + // register allocation weight. Variable *Dest = Instr->getDest(); // ReturnReg doubles as ReturnRegLo as necessary. Variable *ReturnReg = nullptr; @@ -211,8 +205,8 @@ break; case IceType_f32: case IceType_f64: - // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with - // the fstp instruction. + // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with the + // fstp instruction. break; case IceType_v4i1: case IceType_v8i1: @@ -247,8 +241,8 @@ if (ReturnRegHi) Context.insert(InstFakeDef::create(Func, ReturnRegHi)); - // Add the appropriate offset to esp. The call instruction takes care - // of resetting the stack offset during emission. + // Add the appropriate offset to esp. The call instruction takes care of + // resetting the stack offset during emission. if (ParameterAreaSizeBytes) { Variable *esp = Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); @@ -287,22 +281,21 @@ } } } else if (isScalarFloatingType(Dest->getType())) { - // Special treatment for an FP function which returns its result in - // st(0). - // If Dest ends up being a physical xmm register, the fstp emit code - // will route st(0) through a temporary stack slot. + // Special treatment for an FP function which returns its result in st(0). + // If Dest ends up being a physical xmm register, the fstp emit code will + // route st(0) through a temporary stack slot. _fstp(Dest); - // Create a fake use of Dest in case it actually isn't used, - // because st(0) still needs to be popped. + // Create a fake use of Dest in case it actually isn't used, because st(0) + // still needs to be popped. Context.insert(InstFakeUse::create(Func, Dest)); } } void TargetX8632::lowerArguments() { VarList &Args = Func->getArgs(); - // The first four arguments of vector type, regardless of their - // position relative to the other arguments in the argument list, are - // passed in registers xmm0 - xmm3. + // The first four arguments of vector type, regardless of their position + // relative to the other arguments in the argument list, are passed in + // registers xmm0 - xmm3. unsigned NumXmmArgs = 0; Context.init(Func->getEntryNode()); @@ -314,9 +307,9 @@ Type Ty = Arg->getType(); if (!isVectorType(Ty)) continue; - // Replace Arg in the argument list with the home register. Then - // generate an instruction in the prolog to copy the home register - // to the assigned location of Arg. + // Replace Arg in the argument list with the home register. Then generate + // an instruction in the prolog to copy the home register to the assigned + // location of Arg. int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs; ++NumXmmArgs; Variable *RegisterArg = Func->makeVariable(Ty); @@ -351,15 +344,14 @@ _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); } } - // Add a ret instruction even if sandboxing is enabled, because - // addEpilog explicitly looks for a ret instruction as a marker for - // where to insert the frame removal instructions. + // Add a ret instruction even if sandboxing is enabled, because addEpilog + // explicitly looks for a ret instruction as a marker for where to insert the + // frame removal instructions. _ret(Reg); // Add a fake use of esp to make sure esp stays alive for the entire - // function. Otherwise post-call esp adjustments get dead-code - // eliminated. TODO: Are there more places where the fake use - // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not - // have a ret instruction. + // function. Otherwise post-call esp adjustments get dead-code eliminated. + // TODO: Are there more places where the fake use should be inserted? E.g. + // "void f(int n){while(1) g(n);}" may not have a ret instruction. Variable *esp = Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); Context.insert(InstFakeUse::create(Func, esp)); @@ -395,16 +387,15 @@ // * LocalsSpillAreaSize: area 6 // * SpillAreaSizeBytes: areas 3 - 7 - // Determine stack frame offsets for each Variable without a - // register assignment. This can be done as one variable per stack - // slot. Or, do coalescing by running the register allocator again - // with an infinite set of registers (as a side effect, this gives - // variables a second chance at physical register assignment). + // Determine stack frame offsets for each Variable without a register + // assignment. This can be done as one variable per stack slot. Or, do + // coalescing by running the register allocator again with an infinite set of + // registers (as a side effect, this gives variables a second chance at + // physical register assignment). // - // A middle ground approach is to leverage sparsity and allocate one - // block of space on the frame for globals (variables with - // multi-block lifetime), and one block to share for locals - // (single-block lifetime). + // A middle ground approach is to leverage sparsity and allocate one block of + // space on the frame for globals (variables with multi-block lifetime), and + // one block to share for locals (single-block lifetime). Context.init(Node); Context.setInsertPoint(Context.getCur()); @@ -414,17 +405,16 @@ RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); VarList SortedSpilledVariables, VariablesLinkedToSpillSlots; size_t GlobalsSize = 0; - // If there is a separate locals area, this represents that area. - // Otherwise it counts any variable not counted by GlobalsSize. + // If there is a separate locals area, this represents that area. Otherwise + // it counts any variable not counted by GlobalsSize. SpillAreaSizeBytes = 0; - // If there is a separate locals area, this specifies the alignment - // for it. + // If there is a separate locals area, this specifies the alignment for it. uint32_t LocalsSlotsAlignmentBytes = 0; - // The entire spill locations area gets aligned to largest natural - // alignment of the variables that have a spill slot. + // The entire spill locations area gets aligned to largest natural alignment + // of the variables that have a spill slot. uint32_t SpillAreaAlignmentBytes = 0; - // A spill slot linked to a variable with a stack slot should reuse - // that stack slot. + // A spill slot linked to a variable with a stack slot should reuse that + // stack slot. std::function<bool(Variable *)> TargetVarHook = [&VariablesLinkedToSpillSlots](Variable *Var) { if (auto *SpillVar = @@ -466,15 +456,14 @@ Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); _push(ebp); _mov(ebp, esp); - // Keep ebp live for late-stage liveness analysis - // (e.g. asm-verbose mode). + // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode). Context.insert(InstFakeUse::create(Func, ebp)); } - // Align the variables area. SpillAreaPaddingBytes is the size of - // the region after the preserved registers and before the spill areas. - // LocalsSlotsPaddingBytes is the amount of padding between the globals - // and locals area if they are separate. + // Align the variables area. SpillAreaPaddingBytes is the size of the region + // after the preserved registers and before the spill areas. + // LocalsSlotsPaddingBytes is the amount of padding between the globals and + // locals area if they are separate. assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES); assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); uint32_t SpillAreaPaddingBytes = 0; @@ -504,9 +493,9 @@ resetStackAdjustment(); - // Fill in stack offsets for stack args, and copy args into registers - // for those that were register-allocated. Args are pushed right to - // left, so Arg[0] is closest to the stack/frame pointer. + // Fill in stack offsets for stack args, and copy args into registers for + // those that were register-allocated. Args are pushed right to left, so + // Arg[0] is closest to the stack/frame pointer. Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); size_t BasicFrameOffset = PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; @@ -576,8 +565,8 @@ if (RI == E) return; - // Convert the reverse_iterator position into its corresponding - // (forward) iterator position. + // Convert the reverse_iterator position into its corresponding (forward) + // iterator position. InstList::iterator InsertPoint = RI.base(); --InsertPoint; Context.init(Node); @@ -586,9 +575,9 @@ Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); if (IsEbpBasedFrame) { Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp); - // For late-stage liveness analysis (e.g. asm-verbose mode), - // adding a fake use of esp before the assignment of esp=ebp keeps - // previous esp adjustments from being dead-code eliminated. + // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake + // use of esp before the assignment of esp=ebp keeps previous esp + // adjustments from being dead-code eliminated. Context.insert(InstFakeUse::create(Func, esp)); _mov(esp, ebp); _pop(ebp); @@ -747,8 +736,8 @@ continue; typename T::IceType *Const = llvm::cast<typename T::IceType>(C); typename T::IceType::PrimType Value = Const->getValue(); - // Use memcpy() to copy bits from Value into RawValue in a way - // that avoids breaking strict-aliasing rules. + // Use memcpy() to copy bits from Value into RawValue in a way that avoids + // breaking strict-aliasing rules. typename T::PrimitiveIntType RawValue; memcpy(&RawValue, &Value, sizeof(Value)); char buf[30]; @@ -766,8 +755,8 @@ void TargetDataX8632::lowerConstants() { if (Ctx->getFlags().getDisableTranslation()) return; - // No need to emit constants from the int pool since (for x86) they - // are embedded as immediates in the instructions, just emit float/double. + // No need to emit constants from the int pool since (for x86) they are + // embedded as immediates in the instructions, just emit float/double. switch (Ctx->getFlags().getOutFileType()) { case FT_Elf: { ELFObjectWriter *Writer = Ctx->getObjectWriter(); @@ -846,19 +835,17 @@ TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) : TargetHeaderLowering(Ctx) {} -// In some cases, there are x-macros tables for both high-level and -// low-level instructions/operands that use the same enum key value. -// The tables are kept separate to maintain a proper separation -// between abstraction layers. There is a risk that the tables could -// get out of sync if enum values are reordered or if entries are -// added or deleted. The following dummy namespaces use +// In some cases, there are x-macros tables for both high-level and low-level +// instructions/operands that use the same enum key value. The tables are kept +// separate to maintain a proper separation between abstraction layers. There +// is a risk that the tables could get out of sync if enum values are reordered +// or if entries are added or deleted. The following dummy namespaces use // static_asserts to ensure everything is kept in sync. namespace { // Validate the enum values in FCMPX8632_TABLE. namespace dummy1 { -// Define a temporary set of enum values based on low-level table -// entries. +// Define a temporary set of enum values based on low-level table entries. enum _tmp_enum { #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val, FCMPX8632_TABLE @@ -869,8 +856,8 @@ #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; ICEINSTFCMP_TABLE #undef X -// Define a set of constants based on low-level table entries, and -// ensure the table entry keys are consistent. +// Define a set of constants based on low-level table entries, and ensure the +// table entry keys are consistent. #define X(val, dflt, swapS, C1, C2, swapV, pred) \ static const int _table2_##val = _tmp_##val; \ static_assert( \ @@ -878,8 +865,8 @@ "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE"); FCMPX8632_TABLE #undef X -// Repeat the static asserts with respect to the high-level table -// entries in case the high-level table has extra entries. +// Repeat the static asserts with respect to the high-level table entries in +// case the high-level table has extra entries. #define X(tag, str) \ static_assert( \ _table1_##tag == _table2_##tag, \ @@ -890,8 +877,7 @@ // Validate the enum values in ICMPX8632_TABLE. namespace dummy2 { -// Define a temporary set of enum values based on low-level table -// entries. +// Define a temporary set of enum values based on low-level table entries. enum _tmp_enum { #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, ICMPX8632_TABLE @@ -902,8 +888,8 @@ #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; ICEINSTICMP_TABLE #undef X -// Define a set of constants based on low-level table entries, and -// ensure the table entry keys are consistent. +// Define a set of constants based on low-level table entries, and ensure the +// table entry keys are consistent. #define X(val, C_32, C1_64, C2_64, C3_64) \ static const int _table2_##val = _tmp_##val; \ static_assert( \ @@ -911,8 +897,8 @@ "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE"); ICMPX8632_TABLE #undef X -// Repeat the static asserts with respect to the high-level table -// entries in case the high-level table has extra entries. +// Repeat the static asserts with respect to the high-level table entries in +// case the high-level table has extra entries. #define X(tag, str) \ static_assert( \ _table1_##tag == _table2_##tag, \ @@ -923,8 +909,7 @@ // Validate the enum values in ICETYPEX8632_TABLE. namespace dummy3 { -// Define a temporary set of enum values based on low-level table -// entries. +// Define a temporary set of enum values based on low-level table entries. enum _tmp_enum { #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag, ICETYPEX8632_TABLE @@ -936,16 +921,16 @@ static const int _table1_##tag = tag; ICETYPE_TABLE #undef X -// Define a set of constants based on low-level table entries, and -// ensure the table entry keys are consistent. +// Define a set of constants based on low-level table entries, and ensure the +// table entry keys are consistent. #define X(tag, elementty, cvt, sdss, pack, width, fld) \ static const int _table2_##tag = _tmp_##tag; \ static_assert(_table1_##tag == _table2_##tag, \ "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); ICETYPEX8632_TABLE #undef X -// Repeat the static asserts with respect to the high-level table -// entries in case the high-level table has extra entries. +// Repeat the static asserts with respect to the high-level table entries in +// case the high-level table has extra entries. #define X(tag, sizeLog2, align, elts, elty, str) \ static_assert(_table1_##tag == _table2_##tag, \ "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h index 6187809..b1d74f5 100644 --- a/src/IceTargetLoweringX8632.h +++ b/src/IceTargetLoweringX8632.h
@@ -8,9 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares the TargetLoweringX8632 class, which -/// implements the TargetLowering interface for the x86-32 -/// architecture. +/// This file declares the TargetLoweringX8632 class, which implements the +/// TargetLowering interface for the x86-32 architecture. /// //===----------------------------------------------------------------------===//
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h index 3bee361..918a585 100644 --- a/src/IceTargetLoweringX8632Traits.h +++ b/src/IceTargetLoweringX8632Traits.h
@@ -390,10 +390,10 @@ const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) { // TODO(stichnot): Declaring Permutation this way loses type/size - // information. Fix this in conjunction with the caller-side TODO. + // information. Fix this in conjunction with the caller-side TODO. assert(Permutation.size() >= RegisterSet::Reg_NUM); // Expected upper bound on the number of registers in a single equivalence - // class. For x86-32, this would comprise the 8 XMM registers. This is for + // class. For x86-32, this would comprise the 8 XMM registers. This is for // performance, not correctness. static const unsigned MaxEquivalenceClassSize = 8; using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>; @@ -477,8 +477,8 @@ static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16; /// @} - /// Value is in bytes. Return Value adjusted to the next highest multiple - /// of the stack alignment. + /// Value is in bytes. Return Value adjusted to the next highest multiple of + /// the stack alignment. static uint32_t applyStackAlignment(uint32_t Value) { return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); } @@ -500,17 +500,17 @@ /// instruction. There is one table entry for each of the 16 conditions. /// /// The first four columns describe the case when the operands are floating - /// point scalar values. A comment in lowerFcmp() describes the lowering - /// template. In the most general case, there is a compare followed by two + /// point scalar values. A comment in lowerFcmp() describes the lowering + /// template. In the most general case, there is a compare followed by two /// conditional branches, because some fcmp conditions don't map to a single - /// x86 conditional branch. However, in many cases it is possible to swap the - /// operands in the comparison and have a single conditional branch. Since + /// x86 conditional branch. However, in many cases it is possible to swap the + /// operands in the comparison and have a single conditional branch. Since /// it's quite tedious to validate the table by hand, good execution tests are /// helpful. /// /// The last two columns describe the case when the operands are vectors of - /// floating point values. For most fcmp conditions, there is a clear mapping - /// to a single x86 cmpps instruction variant. Some fcmp conditions require + /// floating point values. For most fcmp conditions, there is a clear mapping + /// to a single x86 cmpps instruction variant. Some fcmp conditions require /// special code to handle and these are marked in the table with a /// Cmpps_Invalid predicate. /// {@ @@ -525,7 +525,7 @@ /// @} /// The following table summarizes the logic for lowering the icmp instruction - /// for i32 and narrower types. Each icmp condition has a clear mapping to an + /// for i32 and narrower types. Each icmp condition has a clear mapping to an /// x86 conditional branch instruction. /// {@ static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[]; @@ -533,8 +533,8 @@ /// @} /// The following table summarizes the logic for lowering the icmp instruction - /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and - /// conditional branches are needed. For the other conditions, three separate + /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and + /// conditional branches are needed. For the other conditions, three separate /// conditional branches are needed. /// {@ static const struct TableIcmp64Type { @@ -567,8 +567,8 @@ using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8632>; using Assembler = X8632::AssemblerX8632; - /// X86Operand extends the Operand hierarchy. Its subclasses are - /// X86OperandMem and VariableSplit. + /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem + /// and VariableSplit. class X86Operand : public ::Ice::Operand { X86Operand() = delete; X86Operand(const X86Operand &) = delete; @@ -644,8 +644,8 @@ }; /// VariableSplit is a way to treat an f64 memory location as a pair of i32 - /// locations (Low and High). This is needed for some cases of the Bitcast - /// instruction. Since it's not possible for integer registers to access the + /// locations (Low and High). This is needed for some cases of the Bitcast + /// instruction. Since it's not possible for integer registers to access the /// XMM registers and vice versa, the lowering forces the f64 to be spilled to /// the stack and then accesses through the VariableSplit. // TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit @@ -685,11 +685,11 @@ Portion Part; }; - /// SpillVariable decorates a Variable by linking it to another Variable. - /// When stack frame offsets are computed, the SpillVariable is given a - /// distinct stack slot only if its linked Variable has a register. If the - /// linked Variable has a stack slot, then the Variable and SpillVariable - /// share that slot. + /// SpillVariable decorates a Variable by linking it to another Variable. When + /// stack frame offsets are computed, the SpillVariable is given a distinct + /// stack slot only if its linked Variable has a register. If the linked + /// Variable has a stack slot, then the Variable and SpillVariable share that + /// slot. class SpillVariable : public Variable { SpillVariable() = delete; SpillVariable(const SpillVariable &) = delete;
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp index 83a5fa5..8c77baa 100644 --- a/src/IceTargetLoweringX8664.cpp +++ b/src/IceTargetLoweringX8664.cpp
@@ -8,9 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file implements the TargetLoweringX8664 class, which -/// consists almost entirely of the lowering sequence for each -/// high-level instruction. +/// This file implements the TargetLoweringX8664 class, which consists almost +/// entirely of the lowering sequence for each high-level instruction. /// //===----------------------------------------------------------------------===// @@ -131,24 +130,22 @@ void TargetX8664::lowerCall(const InstCall *Instr) { // x86-64 calling convention: // - // * At the point before the call, the stack must be aligned to 16 - // bytes. + // * At the point before the call, the stack must be aligned to 16 bytes. // // * The first eight arguments of vector/fp type, regardless of their - // position relative to the other arguments in the argument list, are - // placed in registers %xmm0 - %xmm7. + // position relative to the other arguments in the argument list, are placed + // in registers %xmm0 - %xmm7. // - // * The first six arguments of integer types, regardless of their - // position relative to the other arguments in the argument list, are - // placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9. + // * The first six arguments of integer types, regardless of their position + // relative to the other arguments in the argument list, are placed in + // registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9. // - // * Other arguments are pushed onto the stack in right-to-left order, - // such that the left-most argument ends up on the top of the stack at - // the lowest memory address. + // * Other arguments are pushed onto the stack in right-to-left order, such + // that the left-most argument ends up on the top of the stack at the lowest + // memory address. // - // * Stack arguments of vector type are aligned to start at the next - // highest multiple of 16 bytes. Other stack arguments are aligned to - // 8 bytes. + // * Stack arguments of vector type are aligned to start at the next highest + // multiple of 16 bytes. Other stack arguments are aligned to 8 bytes. // // This intends to match the section "Function Calling Sequence" of the // document "System V Application Binary Interface." @@ -191,41 +188,39 @@ } } - // Adjust the parameter area so that the stack is aligned. It is - // assumed that the stack is already aligned at the start of the - // calling sequence. + // Adjust the parameter area so that the stack is aligned. It is assumed that + // the stack is already aligned at the start of the calling sequence. ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); - // Subtract the appropriate amount for the argument area. This also - // takes care of setting the stack adjustment during emission. + // Subtract the appropriate amount for the argument area. This also takes + // care of setting the stack adjustment during emission. // - // TODO: If for some reason the call instruction gets dead-code - // eliminated after lowering, we would need to ensure that the - // pre-call and the post-call esp adjustment get eliminated as well. + // TODO: If for some reason the call instruction gets dead-code eliminated + // after lowering, we would need to ensure that the pre-call and the + // post-call esp adjustment get eliminated as well. if (ParameterAreaSizeBytes) { _adjust_stack(ParameterAreaSizeBytes); } - // Copy arguments that are passed on the stack to the appropriate - // stack locations. + // Copy arguments that are passed on the stack to the appropriate stack + // locations. for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); } - // Copy arguments to be passed in registers to the appropriate - // registers. - // TODO: Investigate the impact of lowering arguments passed in - // registers after lowering stack arguments as opposed to the other - // way around. Lowering register arguments after stack arguments may - // reduce register pressure. On the other hand, lowering register - // arguments first (before stack arguments) may result in more compact - // code, as the memory operand displacements may end up being smaller - // before any stack adjustment is done. + // Copy arguments to be passed in registers to the appropriate registers. + // TODO: Investigate the impact of lowering arguments passed in registers + // after lowering stack arguments as opposed to the other way around. + // Lowering register arguments after stack arguments may reduce register + // pressure. On the other hand, lowering register arguments first (before + // stack arguments) may result in more compact code, as the memory operand + // displacements may end up being smaller before any stack adjustment is + // done. for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i)); - // Generate a FakeUse of register arguments so that they do not get - // dead code eliminated as a result of the FakeKill of scratch - // registers after the call. + // Generate a FakeUse of register arguments so that they do not get dead + // code eliminated as a result of the FakeKill of scratch registers after + // the call. Context.insert(InstFakeUse::create(Func, Reg)); } @@ -234,8 +229,8 @@ Context.insert(InstFakeUse::create(Func, Reg)); } - // Generate the call instruction. Assign its result to a temporary - // with high register allocation weight. + // Generate the call instruction. Assign its result to a temporary with high + // register allocation weight. Variable *Dest = Instr->getDest(); // ReturnReg doubles as ReturnRegLo as necessary. Variable *ReturnReg = nullptr; @@ -277,8 +272,8 @@ llvm_unreachable("X86-64 Sandboxing codegen not implemented."); } - // Add the appropriate offset to esp. The call instruction takes care - // of resetting the stack offset during emission. + // Add the appropriate offset to esp. The call instruction takes care of + // resetting the stack offset during emission. if (ParameterAreaSizeBytes) { Variable *Esp = Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); @@ -310,11 +305,12 @@ void TargetX8664::lowerArguments() { VarList &Args = Func->getArgs(); - // The first eight vetcor typed arguments (as well as fp arguments) are passed - // in %xmm0 through %xmm7 regardless of their position in the argument list. + // The first eight vetcor typed arguments (as well as fp arguments) are + // passed in %xmm0 through %xmm7 regardless of their position in the argument + // list. unsigned NumXmmArgs = 0; - // The first six integer typed arguments are passed in %rdi, %rsi, %rdx, %rcx, - // %r8, and %r9 regardless of their position in the argument list. + // The first six integer typed arguments are passed in %rdi, %rsi, %rdx, + // %rcx, %r8, and %r9 regardless of their position in the argument list. unsigned NumGprArgs = 0; Context.init(Func->getEntryNode()); @@ -345,9 +341,9 @@ } assert(RegNum != Variable::NoRegister); assert(RegisterArg != nullptr); - // Replace Arg in the argument list with the home register. Then - // generate an instruction in the prolog to copy the home register - // to the assigned location of Arg. + // Replace Arg in the argument list with the home register. Then generate + // an instruction in the prolog to copy the home register to the assigned + // location of Arg. if (BuildDefs::dump()) RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); RegisterArg->setRegNum(RegNum); @@ -371,15 +367,14 @@ _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); } } - // Add a ret instruction even if sandboxing is enabled, because - // addEpilog explicitly looks for a ret instruction as a marker for - // where to insert the frame removal instructions. + // Add a ret instruction even if sandboxing is enabled, because addEpilog + // explicitly looks for a ret instruction as a marker for where to insert the + // frame removal instructions. _ret(Reg); // Add a fake use of esp to make sure esp stays alive for the entire - // function. Otherwise post-call esp adjustments get dead-code - // eliminated. TODO: Are there more places where the fake use - // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not - // have a ret instruction. + // function. Otherwise post-call esp adjustments get dead-code eliminated. + // TODO: Are there more places where the fake use should be inserted? E.g. + // "void f(int n){while(1) g(n);}" may not have a ret instruction. Variable *esp = Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); Context.insert(InstFakeUse::create(Func, esp)); @@ -415,16 +410,15 @@ // * LocalsSpillAreaSize: area 6 // * SpillAreaSizeBytes: areas 3 - 7 - // Determine stack frame offsets for each Variable without a - // register assignment. This can be done as one variable per stack - // slot. Or, do coalescing by running the register allocator again - // with an infinite set of registers (as a side effect, this gives - // variables a second chance at physical register assignment). + // Determine stack frame offsets for each Variable without a register + // assignment. This can be done as one variable per stack slot. Or, do + // coalescing by running the register allocator again with an infinite set of + // registers (as a side effect, this gives variables a second chance at + // physical register assignment). // - // A middle ground approach is to leverage sparsity and allocate one - // block of space on the frame for globals (variables with - // multi-block lifetime), and one block to share for locals - // (single-block lifetime). + // A middle ground approach is to leverage sparsity and allocate one block of + // space on the frame for globals (variables with multi-block lifetime), and + // one block to share for locals (single-block lifetime). Context.init(Node); Context.setInsertPoint(Context.getCur()); @@ -434,17 +428,16 @@ RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); VarList SortedSpilledVariables, VariablesLinkedToSpillSlots; size_t GlobalsSize = 0; - // If there is a separate locals area, this represents that area. - // Otherwise it counts any variable not counted by GlobalsSize. + // If there is a separate locals area, this represents that area. Otherwise + // it counts any variable not counted by GlobalsSize. SpillAreaSizeBytes = 0; - // If there is a separate locals area, this specifies the alignment - // for it. + // If there is a separate locals area, this specifies the alignment for it. uint32_t LocalsSlotsAlignmentBytes = 0; - // The entire spill locations area gets aligned to largest natural - // alignment of the variables that have a spill slot. + // The entire spill locations area gets aligned to largest natural alignment + // of the variables that have a spill slot. uint32_t SpillAreaAlignmentBytes = 0; - // A spill slot linked to a variable with a stack slot should reuse - // that stack slot. + // A spill slot linked to a variable with a stack slot should reuse that + // stack slot. std::function<bool(Variable *)> TargetVarHook = [&VariablesLinkedToSpillSlots](Variable *Var) { if (auto *SpillVar = @@ -486,15 +479,14 @@ Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); _push(ebp); _mov(ebp, esp); - // Keep ebp live for late-stage liveness analysis - // (e.g. asm-verbose mode). + // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode). Context.insert(InstFakeUse::create(Func, ebp)); } - // Align the variables area. SpillAreaPaddingBytes is the size of - // the region after the preserved registers and before the spill areas. - // LocalsSlotsPaddingBytes is the amount of padding between the globals - // and locals area if they are separate. + // Align the variables area. SpillAreaPaddingBytes is the size of the region + // after the preserved registers and before the spill areas. + // LocalsSlotsPaddingBytes is the amount of padding between the globals and + // locals area if they are separate. assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES); assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); uint32_t SpillAreaPaddingBytes = 0; @@ -524,9 +516,9 @@ resetStackAdjustment(); - // Fill in stack offsets for stack args, and copy args into registers - // for those that were register-allocated. Args are pushed right to - // left, so Arg[0] is closest to the stack/frame pointer. + // Fill in stack offsets for stack args, and copy args into registers for + // those that were register-allocated. Args are pushed right to left, so + // Arg[0] is closest to the stack/frame pointer. Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); size_t BasicFrameOffset = PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; @@ -605,8 +597,8 @@ if (RI == E) return; - // Convert the reverse_iterator position into its corresponding - // (forward) iterator position. + // Convert the reverse_iterator position into its corresponding (forward) + // iterator position. InstList::iterator InsertPoint = RI.base(); --InsertPoint; Context.init(Node); @@ -615,9 +607,9 @@ Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); if (IsEbpBasedFrame) { Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp); - // For late-stage liveness analysis (e.g. asm-verbose mode), - // adding a fake use of esp before the assignment of esp=ebp keeps - // previous esp adjustments from being dead-code eliminated. + // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake + // use of esp before the assignment of esp=ebp keeps previous esp + // adjustments from being dead-code eliminated. Context.insert(InstFakeUse::create(Func, esp)); _mov(esp, ebp); _pop(ebp); @@ -758,8 +750,8 @@ continue; typename T::IceType *Const = llvm::cast<typename T::IceType>(C); typename T::IceType::PrimType Value = Const->getValue(); - // Use memcpy() to copy bits from Value into RawValue in a way - // that avoids breaking strict-aliasing rules. + // Use memcpy() to copy bits from Value into RawValue in a way that avoids + // breaking strict-aliasing rules. typename T::PrimitiveIntType RawValue; memcpy(&RawValue, &Value, sizeof(Value)); char buf[30]; @@ -777,8 +769,8 @@ void TargetDataX8664::lowerConstants() { if (Ctx->getFlags().getDisableTranslation()) return; - // No need to emit constants from the int pool since (for x86) they - // are embedded as immediates in the instructions, just emit float/double. + // No need to emit constants from the int pool since (for x86) they are + // embedded as immediates in the instructions, just emit float/double. switch (Ctx->getFlags().getOutFileType()) { case FT_Elf: { ELFObjectWriter *Writer = Ctx->getObjectWriter(); @@ -854,19 +846,17 @@ } } -// In some cases, there are x-macros tables for both high-level and -// low-level instructions/operands that use the same enum key value. -// The tables are kept separate to maintain a proper separation -// between abstraction layers. There is a risk that the tables could -// get out of sync if enum values are reordered or if entries are -// added or deleted. The following dummy namespaces use +// In some cases, there are x-macros tables for both high-level and low-level +// instructions/operands that use the same enum key value. The tables are kept +// separate to maintain a proper separation between abstraction layers. There +// is a risk that the tables could get out of sync if enum values are reordered +// or if entries are added or deleted. The following dummy namespaces use // static_asserts to ensure everything is kept in sync. namespace { // Validate the enum values in FCMPX8664_TABLE. namespace dummy1 { -// Define a temporary set of enum values based on low-level table -// entries. +// Define a temporary set of enum values based on low-level table entries. enum _tmp_enum { #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val, FCMPX8664_TABLE @@ -877,8 +867,8 @@ #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; ICEINSTFCMP_TABLE #undef X -// Define a set of constants based on low-level table entries, and -// ensure the table entry keys are consistent. +// Define a set of constants based on low-level table entries, and ensure the +// table entry keys are consistent. #define X(val, dflt, swapS, C1, C2, swapV, pred) \ static const int _table2_##val = _tmp_##val; \ static_assert( \ @@ -886,8 +876,8 @@ "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE"); FCMPX8664_TABLE #undef X -// Repeat the static asserts with respect to the high-level table -// entries in case the high-level table has extra entries. +// Repeat the static asserts with respect to the high-level table entries in +// case the high-level table has extra entries. #define X(tag, str) \ static_assert( \ _table1_##tag == _table2_##tag, \ @@ -898,8 +888,7 @@ // Validate the enum values in ICMPX8664_TABLE. namespace dummy2 { -// Define a temporary set of enum values based on low-level table -// entries. +// Define a temporary set of enum values based on low-level table entries. enum _tmp_enum { #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, ICMPX8664_TABLE @@ -910,8 +899,8 @@ #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; ICEINSTICMP_TABLE #undef X -// Define a set of constants based on low-level table entries, and -// ensure the table entry keys are consistent. +// Define a set of constants based on low-level table entries, and ensure the +// table entry keys are consistent. #define X(val, C_32, C1_64, C2_64, C3_64) \ static const int _table2_##val = _tmp_##val; \ static_assert( \ @@ -919,8 +908,8 @@ "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE"); ICMPX8664_TABLE #undef X -// Repeat the static asserts with respect to the high-level table -// entries in case the high-level table has extra entries. +// Repeat the static asserts with respect to the high-level table entries in +// case the high-level table has extra entries. #define X(tag, str) \ static_assert( \ _table1_##tag == _table2_##tag, \ @@ -931,8 +920,7 @@ // Validate the enum values in ICETYPEX8664_TABLE. namespace dummy3 { -// Define a temporary set of enum values based on low-level table -// entries. +// Define a temporary set of enum values based on low-level table entries. enum _tmp_enum { #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag, ICETYPEX8664_TABLE @@ -944,16 +932,16 @@ static const int _table1_##tag = tag; ICETYPE_TABLE #undef X -// Define a set of constants based on low-level table entries, and -// ensure the table entry keys are consistent. +// Define a set of constants based on low-level table entries, and ensure the +// table entry keys are consistent. #define X(tag, elementty, cvt, sdss, pack, width, fld) \ static const int _table2_##tag = _tmp_##tag; \ static_assert(_table1_##tag == _table2_##tag, \ "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); ICETYPEX8664_TABLE #undef X -// Repeat the static asserts with respect to the high-level table -// entries in case the high-level table has extra entries. +// Repeat the static asserts with respect to the high-level table entries in +// case the high-level table has extra entries. #define X(tag, sizeLog2, align, elts, elty, str) \ static_assert(_table1_##tag == _table2_##tag, \ "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
diff --git a/src/IceTargetLoweringX8664Traits.h b/src/IceTargetLoweringX8664Traits.h index 454b6cb..0ed40a8 100644 --- a/src/IceTargetLoweringX8664Traits.h +++ b/src/IceTargetLoweringX8664Traits.h
@@ -404,10 +404,10 @@ const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) { // TODO(stichnot): Declaring Permutation this way loses type/size - // information. Fix this in conjunction with the caller-side TODO. + // information. Fix this in conjunction with the caller-side TODO. assert(Permutation.size() >= RegisterSet::Reg_NUM); // Expected upper bound on the number of registers in a single equivalence - // class. For x86-64, this would comprise the 16 XMM registers. This is + // class. For x86-64, this would comprise the 16 XMM registers. This is // for performance, not correctness. static const unsigned MaxEquivalenceClassSize = 8; using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>; @@ -493,8 +493,8 @@ static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16; /// @} - /// Value is in bytes. Return Value adjusted to the next highest multiple - /// of the stack alignment. + /// Value is in bytes. Return Value adjusted to the next highest multiple of + /// the stack alignment. static uint32_t applyStackAlignment(uint32_t Value) { return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); } @@ -516,17 +516,17 @@ /// instruction. There is one table entry for each of the 16 conditions. /// /// The first four columns describe the case when the operands are floating - /// point scalar values. A comment in lowerFcmp() describes the lowering - /// template. In the most general case, there is a compare followed by two + /// point scalar values. A comment in lowerFcmp() describes the lowering + /// template. In the most general case, there is a compare followed by two /// conditional branches, because some fcmp conditions don't map to a single - /// x86 conditional branch. However, in many cases it is possible to swap the - /// operands in the comparison and have a single conditional branch. Since + /// x86 conditional branch. However, in many cases it is possible to swap the + /// operands in the comparison and have a single conditional branch. Since /// it's quite tedious to validate the table by hand, good execution tests are /// helpful. /// /// The last two columns describe the case when the operands are vectors of - /// floating point values. For most fcmp conditions, there is a clear mapping - /// to a single x86 cmpps instruction variant. Some fcmp conditions require + /// floating point values. For most fcmp conditions, there is a clear mapping + /// to a single x86 cmpps instruction variant. Some fcmp conditions require /// special code to handle and these are marked in the table with a /// Cmpps_Invalid predicate. /// {@ @@ -541,7 +541,7 @@ /// @} /// The following table summarizes the logic for lowering the icmp instruction - /// for i32 and narrower types. Each icmp condition has a clear mapping to an + /// for i32 and narrower types. Each icmp condition has a clear mapping to an /// x86 conditional branch instruction. /// {@ static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[]; @@ -549,8 +549,8 @@ /// @} /// The following table summarizes the logic for lowering the icmp instruction - /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and - /// conditional branches are needed. For the other conditions, three separate + /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and + /// conditional branches are needed. For the other conditions, three separate /// conditional branches are needed. /// {@ static const struct TableIcmp64Type { @@ -583,8 +583,8 @@ using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8664>; using Assembler = X8664::AssemblerX8664; - /// X86Operand extends the Operand hierarchy. Its subclasses are - /// X86OperandMem and VariableSplit. + /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem + /// and VariableSplit. class X86Operand : public ::Ice::Operand { X86Operand() = delete; X86Operand(const X86Operand &) = delete; @@ -655,8 +655,8 @@ }; /// VariableSplit is a way to treat an f64 memory location as a pair of i32 - /// locations (Low and High). This is needed for some cases of the Bitcast - /// instruction. Since it's not possible for integer registers to access the + /// locations (Low and High). This is needed for some cases of the Bitcast + /// instruction. Since it's not possible for integer registers to access the /// XMM registers and vice versa, the lowering forces the f64 to be spilled to /// the stack and then accesses through the VariableSplit. // TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit @@ -696,11 +696,11 @@ Portion Part; }; - /// SpillVariable decorates a Variable by linking it to another Variable. - /// When stack frame offsets are computed, the SpillVariable is given a - /// distinct stack slot only if its linked Variable has a register. If the - /// linked Variable has a stack slot, then the Variable and SpillVariable - /// share that slot. + /// SpillVariable decorates a Variable by linking it to another Variable. When + /// stack frame offsets are computed, the SpillVariable is given a distinct + /// stack slot only if its linked Variable has a register. If the linked + /// Variable has a stack slot, then the Variable and SpillVariable share that + /// slot. class SpillVariable : public Variable { SpillVariable() = delete; SpillVariable(const SpillVariable &) = delete;
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h index e032ce9..32c3e3b 100644 --- a/src/IceTargetLoweringX86Base.h +++ b/src/IceTargetLoweringX86Base.h
@@ -8,9 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares the TargetLoweringX86 template class, which -/// implements the TargetLowering base interface for the x86 -/// architecture. +/// This file declares the TargetLoweringX86 template class, which implements +/// the TargetLowering base interface for the x86 architecture. /// //===----------------------------------------------------------------------===// @@ -44,7 +43,7 @@ /// /// Note: Ideally, we should be able to /// -/// static_assert(std::is_base_of<TargetX86Base<Machine>, Machine>::value); +/// static_assert(std::is_base_of<TargetX86Base<Machine>, Machine>::value); /// /// but that does not work: the compiler does not know that Machine inherits /// from TargetX86Base at this point in translation. @@ -106,13 +105,13 @@ void initNodeForLowering(CfgNode *Node) override; /// x86-32: Ensure that a 64-bit Variable has been split into 2 32-bit - /// Variables, creating them if necessary. This is needed for all - /// I64 operations, and it is needed for pushing F64 arguments for - /// function calls using the 32-bit push instruction (though the - /// latter could be done by directly writing to the stack). + /// Variables, creating them if necessary. This is needed for all I64 + /// operations, and it is needed for pushing F64 arguments for function calls + /// using the 32-bit push instruction (though the latter could be done by + /// directly writing to the stack). /// - /// x86-64: Complains loudly if invoked because the cpu can handle - /// 64-bit types natively. + /// x86-64: Complains loudly if invoked because the cpu can handle 64-bit + /// types natively. template <typename T = Traits> typename std::enable_if<!T::Is64Bit, void>::type split64(Variable *Var); template <typename T = Traits> @@ -239,13 +238,12 @@ void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest, Operand *Src0, Operand *Src1); - /// Operand legalization helpers. To deal with address mode - /// constraints, the helpers will create a new Operand and emit - /// instructions that guarantee that the Operand kind is one of those - /// indicated by the LegalMask (a bitmask of allowed kinds). If the - /// input Operand is known to already meet the constraints, it may be - /// simply returned as the result, without creating any new - /// instructions or operands. + /// Operand legalization helpers. To deal with address mode constraints, the + /// helpers will create a new Operand and emit instructions that guarantee + /// that the Operand kind is one of those indicated by the LegalMask (a + /// bitmask of allowed kinds). If the input Operand is known to already meet + /// the constraints, it may be simply returned as the result, without creating + /// any new instructions or operands. enum OperandLegalization { Legal_None = 0, Legal_Reg = 1 << 0, // physical register, not stack location @@ -259,9 +257,9 @@ Variable *legalizeToReg(Operand *From, int32_t RegNum = Variable::NoRegister); /// Legalize the first source operand for use in the cmp instruction. Operand *legalizeSrc0ForCmp(Operand *Src0, Operand *Src1); - /// Turn a pointer operand into a memory operand that can be - /// used by a real load/store operation. Legalizes the operand as well. - /// This is a nop if the operand is already a legal memory operand. + /// Turn a pointer operand into a memory operand that can be used by a real + /// load/store operation. Legalizes the operand as well. This is a nop if the + /// operand is already a legal memory operand. typename Traits::X86OperandMem *formMemoryOperand(Operand *Ptr, Type Ty, bool DoLegalize = true); @@ -271,8 +269,8 @@ static constexpr uint32_t NoSizeLimit = 0; static const Type TypeForSize[]; /// Returns the largest type which is equal to or larger than Size bytes. The - /// type is suitable for copying memory i.e. a load and store will be a - /// single instruction (for example x86 will get f64 not i64). + /// type is suitable for copying memory i.e. a load and store will be a single + /// instruction (for example x86 will get f64 not i64). static Type largestTypeInSize(uint32_t Size, uint32_t MaxSize = NoSizeLimit); /// Returns the smallest type which is equal to or larger than Size bytes. If /// one doesn't exist then the largest type smaller than Size bytes is @@ -304,9 +302,9 @@ const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const override; - /// The following are helpers that insert lowered x86 instructions - /// with minimal syntactic overhead, so that the lowering code can - /// look as close to assembly as practical. + /// The following are helpers that insert lowered x86 instructions with + /// minimal syntactic overhead, so that the lowering code can look as close to + /// assembly as practical. void _adc(Variable *Dest, Operand *Src0) { Context.insert(Traits::Insts::Adc::create(Func, Dest, Src0)); } @@ -450,9 +448,9 @@ Context.insert(Traits::Insts::Lea::create(Func, Dest, Src0)); } void _mfence() { Context.insert(Traits::Insts::Mfence::create(Func)); } - /// If Dest=nullptr is passed in, then a new variable is created, - /// marked as infinite register allocation weight, and returned - /// through the in/out Dest argument. + /// If Dest=nullptr is passed in, then a new variable is created, marked as + /// infinite register allocation weight, and returned through the in/out Dest + /// argument. void _mov(Variable *&Dest, Operand *Src0, int32_t RegNum = Variable::NoRegister) { if (Dest == nullptr) @@ -626,8 +624,8 @@ void _ud2() { Context.insert(Traits::Insts::UD2::create(Func)); } void _xadd(Operand *Dest, Variable *Src, bool Locked) { Context.insert(Traits::Insts::Xadd::create(Func, Dest, Src, Locked)); - // The xadd exchanges Dest and Src (modifying Src). - // Model that update with a FakeDef followed by a FakeUse. + // The xadd exchanges Dest and Src (modifying Src). Model that update with + // a FakeDef followed by a FakeUse. Context.insert( InstFakeDef::create(Func, Src, llvm::dyn_cast<Variable>(Dest))); _set_dest_nonkillable();
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h index c8bf29f..a63f470 100644 --- a/src/IceTargetLoweringX86BaseImpl.h +++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -8,9 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file implements the TargetLoweringX86Base class, which -/// consists almost entirely of the lowering sequence for each -/// high-level instruction. +/// This file implements the TargetLoweringX86Base class, which consists almost +/// entirely of the lowering sequence for each high-level instruction. /// //===----------------------------------------------------------------------===// @@ -63,13 +62,13 @@ /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). bool IsComplex = false; /// IsLiveOut is initialized conservatively to true, and is set to false when - /// we encounter an instruction that ends Var's live range. We disable the - /// folding optimization when Var is live beyond this basic block. Note that + /// we encounter an instruction that ends Var's live range. We disable the + /// folding optimization when Var is live beyond this basic block. Note that /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will /// always be true and the folding optimization will never be performed. bool IsLiveOut = true; // NumUses counts the number of times Var is used as a source operand in the - // basic block. If IsComplex is true and there is more than one use of Var, + // basic block. If IsComplex is true and there is more than one use of Var, // then the folding optimization is disabled for Var. uint32_t NumUses = 0; }; @@ -166,7 +165,7 @@ /// Returns true if the producing instruction has a "complex" lowering sequence. /// This generally means that its lowering sequence requires more than one /// conditional branch, namely 64-bit integer compares and some floating-point -/// compares. When this is true, and there is more than one consumer, we prefer +/// compares. When this is true, and there is more than one consumer, we prefer /// to disable the folding optimization because it minimizes branches. template <class MachineTraits> bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { @@ -222,9 +221,9 @@ setInvalid(I.first); continue; } - // Mark as "dead" rather than outright deleting. This is so that other + // Mark as "dead" rather than outright deleting. This is so that other // peephole style optimizations during or before lowering have access to - // this instruction in undeleted form. See for example + // this instruction in undeleted form. See for example // tryOptimizedCmpxchgCmpBr(). I.second.Instr->setDead(); } @@ -303,8 +302,9 @@ // Run this early so it can be used to focus optimizations on potentially hot // code. - // TODO(stichnot,ascull): currently only used for regalloc not expensive high - // level optimizations which could be focused on potentially hot code. + // TODO(stichnot,ascull): currently only used for regalloc not + // expensive high level optimizations which could be focused on potentially + // hot code. Func->computeLoopNestDepth(); Func->dump("After loop nest depth analysis"); @@ -312,7 +312,7 @@ Func->getVMetadata()->init(VMK_SingleDefs); Func->doAddressOpt(); - // Find read-modify-write opportunities. Do this after address mode + // Find read-modify-write opportunities. Do this after address mode // optimization so that doAddressOpt() doesn't need to be applied to RMW // instructions as well. findRMW(); @@ -321,8 +321,8 @@ // Argument lowering Func->doArgLowering(); - // Target lowering. This requires liveness analysis for some parts of the - // lowering decisions, such as compare/branch fusing. If non-lightweight + // Target lowering. This requires liveness analysis for some parts of the + // lowering decisions, such as compare/branch fusing. If non-lightweight // liveness analysis is used, the instructions need to be renumbered first // TODO: This renumbering should only be necessary if we're actually // calculating live intervals, which we only do for register allocation. @@ -330,9 +330,9 @@ if (Func->hasError()) return; - // TODO: It should be sufficient to use the fastest liveness calculation, i.e. - // livenessLightweight(). However, for some reason that slows down the rest - // of the translation. Investigate. + // TODO: It should be sufficient to use the fastest liveness calculation, + // i.e. livenessLightweight(). However, for some reason that slows down the + // rest of the translation. Investigate. Func->liveness(Liveness_Basic); if (Func->hasError()) return; @@ -357,7 +357,7 @@ Func->liveness(Liveness_Intervals); if (Func->hasError()) return; - // Validate the live range computations. The expensive validation call is + // Validate the live range computations. The expensive validation call is // deliberately only made when assertions are enabled. assert(Func->validateLiveness()); // The post-codegen dump is done here, after liveness analysis and associated @@ -386,9 +386,9 @@ // Shuffle basic block order if -reorder-basic-blocks is enabled. Func->shuffleNodes(); - // Branch optimization. This needs to be done just before code emission. In + // Branch optimization. This needs to be done just before code emission. In // particular, no transformations that insert or reorder CfgNodes should be - // done after branch optimization. We go ahead and do it before nop insertion + // done after branch optimization. We go ahead and do it before nop insertion // to reduce the amount of work needed for searching for opportunities. Func->doBranchOpt(); Func->dump("After branch optimization"); @@ -495,10 +495,10 @@ Ostream &Str = Func->getContext()->getStrDump(); for (CfgNode *Node : Func->getNodes()) { // Walk through the instructions, considering each sequence of 3 - // instructions, and look for the particular RMW pattern. Note that this - // search can be "broken" (false negatives) if there are intervening deleted - // instructions, or intervening instructions that could be safely moved out - // of the way to reveal an RMW pattern. + // instructions, and look for the particular RMW pattern. Note that this + // search can be "broken" (false negatives) if there are intervening + // deleted instructions, or intervening instructions that could be safely + // moved out of the way to reveal an RMW pattern. auto E = Node->getInsts().end(); auto I1 = E, I2 = E, I3 = Node->getInsts().begin(); for (; I3 != E; I1 = I2, I2 = I3, ++I3) { @@ -528,21 +528,21 @@ // problems later. // // With this transformation, the Store instruction acquires a Dest - // variable and is now subject to dead code elimination if there are - // no more uses of "b". Variable "x" is a beacon for determining - // whether the Store instruction gets dead-code eliminated. If the - // Store instruction is eliminated, then it must be the case that - // the RMW instruction ends x's live range, and therefore the RMW - // instruction will be retained and later lowered. On the other - // hand, if the RMW instruction does not end x's live range, then - // the Store instruction must still be present, and therefore the - // RMW instruction is ignored during lowering because it is - // redundant with the Store instruction. + // variable and is now subject to dead code elimination if there + // are no more uses of "b". Variable "x" is a beacon for + // determining whether the Store instruction gets dead-code + // eliminated. If the Store instruction is eliminated, then it + // must be the case that the RMW instruction ends x's live range, + // and therefore the RMW instruction will be retained and later + // lowered. On the other hand, if the RMW instruction does not end + // x's live range, then the Store instruction must still be + // present, and therefore the RMW instruction is ignored during + // lowering because it is redundant with the Store instruction. // // Note that if "a" has further uses, the RMW transformation may // still trigger, resulting in two loads and one store, which is - // worse than the original one load and one store. However, this is - // probably rare, and caching probably keeps it just as fast. + // worse than the original one load and one store. However, this + // is probably rare, and caching probably keeps it just as fast. if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(), Store->getAddr())) continue; @@ -589,11 +589,10 @@ return Intrinsics::MemoryOrderInvalid; } -/// Determines whether the dest of a Load instruction can be folded -/// into one of the src operands of a 2-operand instruction. This is -/// true as long as the load dest matches exactly one of the binary -/// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if -/// the answer is true. +/// Determines whether the dest of a Load instruction can be folded into one of +/// the src operands of a 2-operand instruction. This is true as long as the +/// load dest matches exactly one of the binary instruction's src operands. +/// Replaces Src0 or Src1 with LoadSrc if the answer is true. inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, Operand *&Src0, Operand *&Src1) { if (Src0 == LoadDest && Src1 != LoadDest) { @@ -615,8 +614,8 @@ Operand *LoadSrc = nullptr; Inst *CurInst = Context.getCur(); Inst *Next = Context.getNextInst(); - // Determine whether the current instruction is a Load - // instruction or equivalent. + // Determine whether the current instruction is a Load instruction or + // equivalent. if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { // An InstLoad always qualifies. LoadDest = Load->getDest(); @@ -624,9 +623,9 @@ LoadSrc = formMemoryOperand(Load->getSourceAddress(), LoadDest->getType(), DoLegalize); } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { - // An AtomicLoad intrinsic qualifies as long as it has a valid - // memory ordering, and can be implemented in a single - // instruction (i.e., not i64 on x86-32). + // An AtomicLoad intrinsic qualifies as long as it has a valid memory + // ordering, and can be implemented in a single instruction (i.e., not + // i64 on x86-32). Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; if (ID == Intrinsics::AtomicLoad && (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) && @@ -638,9 +637,9 @@ DoLegalize); } } - // A Load instruction can be folded into the following - // instruction only if the following instruction ends the Load's - // Dest variable's live range. + // A Load instruction can be folded into the following instruction only + // if the following instruction ends the Load's Dest variable's live + // range. if (LoadDest && Next && Next->isLastUse(LoadDest)) { assert(LoadSrc); Inst *NewInst = nullptr; @@ -673,8 +672,7 @@ Select->getCondition(), Src0, Src1); } } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) { - // The load dest can always be folded into a Cast - // instruction. + // The load dest can always be folded into a Cast instruction. Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0)); if (Src0 == LoadDest) { NewInst = InstCast::create(Func, Cast->getCastKind(), @@ -685,8 +683,8 @@ CurInst->setDeleted(); Next->setDeleted(); Context.insert(NewInst); - // Update NewInst->LiveRangesEnded so that target lowering - // may benefit. Also update NewInst->HasSideEffects. + // Update NewInst->LiveRangesEnded so that target lowering may + // benefit. Also update NewInst->HasSideEffects. NewInst->spliceLivenessInfo(Next, CurInst); } } @@ -721,8 +719,8 @@ Reg = Func->makeVariable(Ty); Reg->setRegNum(RegNum); PhysicalRegisters[Ty][RegNum] = Reg; - // Specially mark esp as an "argument" so that it is considered - // live upon function entry. + // Specially mark esp as an "argument" so that it is considered live upon + // function entry. if (RegNum == Traits::RegisterSet::Reg_esp) { Func->addImplicitArg(Reg); Reg->setIgnoreLiveness(); @@ -782,13 +780,12 @@ /// Helper function for addProlog(). /// -/// This assumes Arg is an argument passed on the stack. This sets the -/// frame offset for Arg and updates InArgsSizeBytes according to Arg's -/// width. For an I64 arg that has been split into Lo and Hi components, -/// it calls itself recursively on the components, taking care to handle -/// Lo first because of the little-endian architecture. Lastly, this -/// function generates an instruction to copy Arg into its assigned -/// register if applicable. +/// This assumes Arg is an argument passed on the stack. This sets the frame +/// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an +/// I64 arg that has been split into Lo and Hi components, it calls itself +/// recursively on the components, taking care to handle Lo first because of the +/// little-endian architecture. Lastly, this function generates an instruction +/// to copy Arg into its assigned register if applicable. template <class Machine> void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, Variable *FramePtr, @@ -819,8 +816,8 @@ _mov(Arg, Mem); } // This argument-copying instruction uses an explicit Traits::X86OperandMem - // operand instead of a Variable, so its fill-from-stack operation has to be - // tracked separately for statistics. + // operand instead of a Variable, so its fill-from-stack operation has to + // be tracked separately for statistics. Ctx->statsUpdateFills(); } } @@ -837,9 +834,8 @@ default: return; case IceType_i64: - // TODO: Only consider F64 if we need to push each half when - // passing as an argument to a function call. Note that each half - // is still typed as I32. + // TODO: Only consider F64 if we need to push each half when passing as an + // argument to a function call. Note that each half is still typed as I32. case IceType_f64: break; } @@ -946,11 +942,11 @@ template <class Machine> void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { IsEbpBasedFrame = true; - // Conservatively require the stack to be aligned. Some stack - // adjustment operations implemented below assume that the stack is - // aligned before the alloca. All the alloca code ensures that the - // stack alignment is preserved after the alloca. The stack alignment - // restriction can be relaxed in some cases. + // Conservatively require the stack to be aligned. Some stack adjustment + // operations implemented below assume that the stack is aligned before the + // alloca. All the alloca code ensures that the stack alignment is preserved + // after the alloca. The stack alignment restriction can be relaxed in some + // cases. NeedsStackAlignment = true; // TODO(stichnot): minimize the number of adjustments of esp, etc. @@ -977,8 +973,8 @@ Value = Utils::applyAlignment(Value, Alignment); _sub(esp, Ctx->getConstantInt32(Value)); } else { - // Non-constant sizes need to be adjusted to the next highest - // multiple of the required alignment at runtime. + // Non-constant sizes need to be adjusted to the next highest multiple of + // the required alignment at runtime. Variable *T = makeReg(IceType_i32); _mov(T, TotalSize); _add(T, Ctx->getConstantInt32(Alignment - 1)); @@ -988,17 +984,16 @@ _mov(Dest, esp); } -/// Strength-reduce scalar integer multiplication by a constant (for -/// i32 or narrower) for certain constants. The lea instruction can be -/// used to multiply by 3, 5, or 9, and the lsh instruction can be used -/// to multiply by powers of 2. These can be combined such that -/// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, -/// combined with left-shifting by 2. +/// Strength-reduce scalar integer multiplication by a constant (for i32 or +/// narrower) for certain constants. The lea instruction can be used to multiply +/// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of +/// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 +/// lea-based multiplies by 5, combined with left-shifting by 2. template <class Machine> bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1) { - // Disable this optimization for Om1 and O0, just to keep things - // simple there. + // Disable this optimization for Om1 and O0, just to keep things simple + // there. if (Ctx->getFlags().getOptLevel() < Opt_1) return false; Type Ty = Dest->getType(); @@ -1054,8 +1049,8 @@ // Lea optimization only works for i16 and i32 types, not i8. if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) return false; - // Limit the number of lea/shl operations for a single multiply, to - // a somewhat arbitrary choice of 3. + // Limit the number of lea/shl operations for a single multiply, to a + // somewhat arbitrary choice of 3. const uint32_t MaxOpsForOptimizedMul = 3; if (CountOps > MaxOpsForOptimizedMul) return false; @@ -1101,11 +1096,11 @@ } if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { // These x86-32 helper-call-involved instructions are lowered in this - // separate switch. This is because loOperand() and hiOperand() - // may insert redundant instructions for constant blinding and - // pooling. Such redundant instructions will fail liveness analysis - // under -Om1 setting. And, actually these arguments do not need - // to be processed with loOperand() and hiOperand() to be used. + // separate switch. This is because loOperand() and hiOperand() may insert + // redundant instructions for constant blinding and pooling. Such redundant + // instructions will fail liveness analysis under -Om1 setting. And, + // actually these arguments do not need to be processed with loOperand() + // and hiOperand() to be used. switch (Inst->getOp()) { case InstArithmetic::Udiv: { const SizeT MaxSrcs = 2; @@ -1216,8 +1211,8 @@ _imul(T_2, Src0Lo); _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax); _mul(T_4Lo, T_3, Src1Lo); - // The mul instruction produces two dest variables, edx:eax. We - // create a fake definition of edx to account for this. + // The mul instruction produces two dest variables, edx:eax. We create a + // fake definition of edx to account for this. Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); _mov(DestLo, T_4Lo); _add(T_4Hi, T_1); @@ -1253,9 +1248,9 @@ _shl(T_2, T_1); _test(T_1, BitTest); _br(Traits::Cond::Br_e, Label); - // T_2 and T_3 are being assigned again because of the - // intra-block control flow, so we need the _mov_nonkillable - // variant to avoid liveness problems. + // T_2 and T_3 are being assigned again because of the intra-block + // control flow, so we need the _mov_nonkillable variant to avoid + // liveness problems. _mov_nonkillable(T_3, T_2); _mov_nonkillable(T_2, Zero); Context.insert(Label); @@ -1289,9 +1284,9 @@ _shr(T_3, T_1); _test(T_1, BitTest); _br(Traits::Cond::Br_e, Label); - // T_2 and T_3 are being assigned again because of the - // intra-block control flow, so we need the _mov_nonkillable - // variant to avoid liveness problems. + // T_2 and T_3 are being assigned again because of the intra-block + // control flow, so we need the _mov_nonkillable variant to avoid + // liveness problems. _mov_nonkillable(T_2, T_3); _mov_nonkillable(T_3, Zero); Context.insert(Label); @@ -1325,10 +1320,10 @@ _sar(T_3, T_1); _test(T_1, BitTest); _br(Traits::Cond::Br_e, Label); - // T_2 and T_3 are being assigned again because of the - // intra-block control flow, so T_2 needs the _mov_nonkillable - // variant to avoid liveness problems. T_3 doesn't need special - // treatment because it is reassigned via _sar instead of _mov. + // T_2 and T_3 are being assigned again because of the intra-block + // control flow, so T_2 needs the _mov_nonkillable variant to avoid + // liveness problems. T_3 doesn't need special treatment because it is + // reassigned via _sar instead of _mov. _mov_nonkillable(T_2, T_3); _sar(T_3, SignExtend); Context.insert(Label); @@ -1353,8 +1348,8 @@ return; } if (isVectorType(Dest->getType())) { - // TODO: Trap on integer divide and integer modulo by zero. - // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 + // TODO: Trap on integer divide and integer modulo by zero. See: + // https://code.google.com/p/nativeclient/issues/detail?id=3899 if (llvm::isa<typename Traits::X86OperandMem>(Src1)) Src1 = legalizeToReg(Src1); switch (Inst->getOp()) { @@ -1519,8 +1514,8 @@ if (optimizeScalarMul(Dest, Src0, C->getValue())) return; } - // The 8-bit version of imul only allows the form "imul r/m8" - // where T must be in eax. + // The 8-bit version of imul only allows the form "imul r/m8" where T must + // be in eax. if (isByteSizedArithType(Dest->getType())) { _mov(T, Src0, Traits::RegisterSet::Reg_eax); Src1 = legalize(Src1, Legal_Reg | Legal_Mem); @@ -1580,11 +1575,11 @@ } break; case InstArithmetic::Sdiv: - // TODO(stichnot): Enable this after doing better performance - // and cross testing. + // TODO(stichnot): Enable this after doing better performance and cross + // testing. if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { - // Optimize division by constant power of 2, but not for Om1 - // or O0, just to keep things simple there. + // Optimize division by constant power of 2, but not for Om1 or O0, just + // to keep things simple there. if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { int32_t Divisor = C->getValue(); uint32_t UDivisor = static_cast<uint32_t>(Divisor); @@ -1600,8 +1595,8 @@ // dest=t uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); _mov(T, Src0); - // If for some reason we are dividing by 1, just treat it - // like an assignment. + // If for some reason we are dividing by 1, just treat it like an + // assignment. if (LogDiv > 0) { // The initial sar is unnecessary when dividing by 2. if (LogDiv > 1) @@ -1656,11 +1651,11 @@ } break; case InstArithmetic::Srem: - // TODO(stichnot): Enable this after doing better performance - // and cross testing. + // TODO(stichnot): Enable this after doing better performance and cross + // testing. if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { - // Optimize mod by constant power of 2, but not for Om1 or O0, - // just to keep things simple there. + // Optimize mod by constant power of 2, but not for Om1 or O0, just to + // keep things simple there. if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { int32_t Divisor = C->getValue(); uint32_t UDivisor = static_cast<uint32_t>(Divisor); @@ -1777,8 +1772,8 @@ // memory. Src0Legal = legalize(Src0); } else { - // If Dest could be a stack operand, then RI must be a physical - // register or a scalar integer immediate. + // If Dest could be a stack operand, then RI must be a physical register + // or a scalar integer immediate. Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm); } if (isVectorType(Dest->getType())) @@ -1803,8 +1798,8 @@ default: break; case BoolFolding::PK_Icmp32: { - // TODO(stichnot): Refactor similarities between this block and - // the corresponding code in lowerIcmp(). + // TODO(stichnot): Refactor similarities between this block and the + // corresponding code in lowerIcmp(). auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); Operand *Src0 = Producer->getSrc(0); Operand *Src1 = legalize(Producer->getSrc(1)); @@ -1835,10 +1830,10 @@ case InstCast::Sext: { // Src0RM is the source operand legalized to physical register or memory, // but not immediate, since the relevant x86 native instructions don't - // allow an immediate operand. If the operand is an immediate, we could - // consider computing the strength-reduced result at translation time, - // but we're unlikely to see something like that in the bitcode that - // the optimizer wouldn't have already taken care of. + // allow an immediate operand. If the operand is an immediate, we could + // consider computing the strength-reduced result at translation time, but + // we're unlikely to see something like that in the bitcode that the + // optimizer wouldn't have already taken care of. Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); if (isVectorType(Dest->getType())) { Type DestTy = Dest->getType(); @@ -1898,8 +1893,8 @@ typeWidthInBytes(Src0RM->getType())) { _mov(T, Src0RM); } else { - // Widen the source using movsx or movzx. (It doesn't matter - // which one, since the following shl/sar overwrite the bits.) + // Widen the source using movsx or movzx. (It doesn't matter which one, + // since the following shl/sar overwrite the bits.) _movzx(T, Src0RM); } _shl(T, ShiftAmount); @@ -2010,12 +2005,11 @@ _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); _movp(Dest, T); } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { - // Use a helper for converting floating-point values to 64-bit - // integers. SSE2 appears to have no way to convert from xmm - // registers to something like the edx:eax register pair, and - // gcc and clang both want to use x87 instructions complete with - // temporary manipulation of the status word. This helper is - // not needed for x86-64. + // Use a helper for converting floating-point values to 64-bit integers. + // SSE2 appears to have no way to convert from xmm registers to something + // like the edx:eax register pair, and gcc and clang both want to use x87 + // instructions complete with temporary manipulation of the status word. + // This helper is not needed for x86-64. split64(Dest); const SizeT MaxSrcs = 1; Type SrcType = Inst->getSrc(0)->getType(); @@ -2150,8 +2144,8 @@ lowerCall(Call); } else if (Src0->getType() == IceType_i64 || (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { - // Use a helper for x86-32 and x86-64. Also use a helper for - // i32 on x86-32. + // Use a helper for x86-32 and x86-64. Also use a helper for i32 on + // x86-32. const SizeT MaxSrcs = 1; Type DestType = Dest->getType(); IceString TargetString; @@ -2285,8 +2279,8 @@ if (Traits::Is64Bit) { Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); Variable *T = makeReg(IceType_f64); - // Movd requires its fp argument (in this case, the bitcast destination) - // to be an xmm register. + // Movd requires its fp argument (in this case, the bitcast + // destination) to be an xmm register. T->setMustHaveReg(); _movd(T, Src0RM); _mov(Dest, T); @@ -2318,8 +2312,8 @@ Func, Spill, Traits::VariableSplit::High); _mov(T_Lo, loOperand(Src0)); // Technically, the Spill is defined after the _store happens, but - // SpillLo is considered a "use" of Spill so define Spill before it - // is used. + // SpillLo is considered a "use" of Spill so define Spill before it is + // used. Context.insert(InstFakeDef::create(Func, Spill)); _store(T_Lo, SpillLo); _mov(T_Hi, hiOperand(Src0)); @@ -2384,8 +2378,8 @@ // Use pshufd and movd/movss. Variable *T = nullptr; if (Index) { - // The shuffle only needs to occur if the element to be extracted - // is not at the lowest index. + // The shuffle only needs to occur if the element to be extracted is not + // at the lowest index. Constant *Mask = Ctx->getConstantInt32(Index); T = makeReg(Ty); _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask); @@ -2396,11 +2390,11 @@ if (InVectorElementTy == IceType_i32) { _movd(ExtractedElementR, T); } else { // Ty == IceType_f32 - // TODO(wala): _movss is only used here because _mov does not - // allow a vector source and a scalar destination. _mov should be - // able to be used here. - // _movss is a binary instruction, so the FakeDef is needed to - // keep the live range analysis consistent. + // TODO(wala): _movss is only used here because _mov does not allow a + // vector source and a scalar destination. _mov should be able to be + // used here. + // _movss is a binary instruction, so the FakeDef is needed to keep the + // live range analysis consistent. Context.insert(InstFakeDef::create(Func, ExtractedElementR)); _movss(ExtractedElementR, T); } @@ -2408,8 +2402,8 @@ assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); // Spill the value to a stack slot and do the extraction in memory. // - // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when - // support for legalizing to mem is implemented. + // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support + // for legalizing to mem is implemented. Variable *Slot = Func->makeVariable(Ty); Slot->setMustNotHaveReg(); _movp(Slot, legalizeToReg(SourceVectNotLegalized)); @@ -2589,9 +2583,9 @@ Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); - // SSE2 only has signed comparison operations. Transform unsigned - // inputs in a manner that allows for the use of signed comparison - // operations by flipping the high order bits. + // SSE2 only has signed comparison operations. Transform unsigned inputs in + // a manner that allows for the use of signed comparison operations by + // flipping the high order bits. if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge || Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) { Variable *T0 = makeReg(Ty); @@ -2726,8 +2720,8 @@ Type InVectorElementTy = Traits::getInVectorElementType(Ty); if (ElementTy == IceType_i1) { - // Expand the element to the appropriate size for it to be inserted - // in the vector. + // Expand the element to the appropriate size for it to be inserted in the + // vector. Variable *Expanded = Func->makeVariable(InVectorElementTy); InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsertNotLegalized); @@ -2773,14 +2767,13 @@ return; } - // shufps treats the source and desination operands as vectors of - // four doublewords. The destination's two high doublewords are - // selected from the source operand and the two low doublewords are - // selected from the (original value of) the destination operand. - // An insertelement operation can be effected with a sequence of two - // shufps operations with appropriate masks. In all cases below, - // Element[0] is being inserted into SourceVectOperand. Indices are - // ordered from left to right. + // shufps treats the source and destination operands as vectors of four + // doublewords. The destination's two high doublewords are selected from + // the source operand and the two low doublewords are selected from the + // (original value of) the destination operand. An insertelement operation + // can be effected with a sequence of two shufps operations with + // appropriate masks. In all cases below, Element[0] is being inserted into + // SourceVectOperand. Indices are ordered from left to right. // // insertelement into index 1 (result is stored in ElementR): // ElementR := ElementR[0, 0] SourceVectRM[0, 0] @@ -2814,11 +2807,10 @@ } } else { assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); - // Spill the value to a stack slot and perform the insertion in - // memory. + // Spill the value to a stack slot and perform the insertion in memory. // - // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when - // support for legalizing to mem is implemented. + // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support + // for legalizing to mem is implemented. Variable *Slot = Func->makeVariable(Ty); Slot->setMustNotHaveReg(); _movp(Slot, legalizeToReg(SourceVectNotLegalized)); @@ -2864,25 +2856,25 @@ _mfence(); return; case Intrinsics::AtomicFenceAll: - // NOTE: FenceAll should prevent and load/store from being moved - // across the fence (both atomic and non-atomic). The InstX8632Mfence - // instruction is currently marked coarsely as "HasSideEffects". + // NOTE: FenceAll should prevent and load/store from being moved across the + // fence (both atomic and non-atomic). The InstX8632Mfence instruction is + // currently marked coarsely as "HasSideEffects". _mfence(); return; case Intrinsics::AtomicIsLockFree: { // X86 is always lock free for 8/16/32/64 bit accesses. - // TODO(jvoung): Since the result is constant when given a constant - // byte size, this opens up DCE opportunities. + // TODO(jvoung): Since the result is constant when given a constant byte + // size, this opens up DCE opportunities. Operand *ByteSize = Instr->getArg(0); Variable *Dest = Instr->getDest(); if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) { Constant *Result; switch (CI->getValue()) { default: - // Some x86-64 processors support the cmpxchg16b intruction, which - // can make 16-byte operations lock free (when used with the LOCK - // prefix). However, that's not supported in 32-bit mode, so just - // return 0 even for large sizes. + // Some x86-64 processors support the cmpxchg16b instruction, which can + // make 16-byte operations lock free (when used with the LOCK prefix). + // However, that's not supported in 32-bit mode, so just return 0 even + // for large sizes. Result = Ctx->getConstantZero(IceType_i32); break; case 1: @@ -2900,8 +2892,8 @@ return; } case Intrinsics::AtomicLoad: { - // We require the memory address to be naturally aligned. - // Given that is the case, then normal loads are atomic. + // We require the memory address to be naturally aligned. Given that is the + // case, then normal loads are atomic. if (!Intrinsics::isMemoryOrderValid( ID, getConstantMemoryOrder(Instr->getArg(1)))) { Func->setError("Unexpected memory ordering for AtomicLoad"); @@ -2910,10 +2902,10 @@ Variable *Dest = Instr->getDest(); if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { // Follow what GCC does and use a movq instead of what lowerLoad() - // normally does (split the load into two). - // Thus, this skips load/arithmetic op folding. Load/arithmetic folding - // can't happen anyway, since this is x86-32 and integer arithmetic only - // happens on 32-bit quantities. + // normally does (split the load into two). Thus, this skips + // load/arithmetic op folding. Load/arithmetic folding can't happen + // anyway, since this is x86-32 and integer arithmetic only happens on + // 32-bit quantities. Variable *T = makeReg(IceType_f64); typename Traits::X86OperandMem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64); @@ -2929,8 +2921,8 @@ InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); lowerLoad(Load); // Make sure the atomic load isn't elided when unused, by adding a FakeUse. - // Since lowerLoad may fuse the load w/ an arithmetic instruction, - // insert the FakeUse on the last-inserted instruction's dest. + // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert + // the FakeUse on the last-inserted instruction's dest. Context.insert( InstFakeUse::create(Func, Context.getLastInserted()->getDest())); return; @@ -2953,15 +2945,15 @@ Func->setError("Unexpected memory ordering for AtomicStore"); return; } - // We require the memory address to be naturally aligned. - // Given that is the case, then normal stores are atomic. - // Add a fence after the store to make it visible. + // We require the memory address to be naturally aligned. Given that is the + // case, then normal stores are atomic. Add a fence after the store to make + // it visible. Operand *Value = Instr->getArg(0); Operand *Ptr = Instr->getArg(1); if (!Traits::Is64Bit && Value->getType() == IceType_i64) { - // Use a movq instead of what lowerStore() normally does - // (split the store into two), following what GCC does. - // Cast the bits from int -> to an xmm register first. + // Use a movq instead of what lowerStore() normally does (split the store + // into two), following what GCC does. Cast the bits from int -> to an + // xmm register first. Variable *T = makeReg(IceType_f64); InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); lowerCast(Cast); @@ -2980,8 +2972,8 @@ case Intrinsics::Bswap: { Variable *Dest = Instr->getDest(); Operand *Val = Instr->getArg(0); - // In 32-bit mode, bswap only works on 32-bit arguments, and the - // argument must be a register. Use rotate left for 16-bit bswap. + // In 32-bit mode, bswap only works on 32-bit arguments, and the argument + // must be a register. Use rotate left for 16-bit bswap. if (!Traits::Is64Bit && Val->getType() == IceType_i64) { Val = legalizeUndef(Val); Variable *T_Lo = legalizeToReg(loOperand(Val)); @@ -3070,8 +3062,8 @@ return; } case Intrinsics::Ctlz: { - // The "is zero undef" parameter is ignored and we always return - // a well-defined value. + // The "is zero undef" parameter is ignored and we always return a + // well-defined value. Operand *Val = legalize(Instr->getArg(0)); Operand *FirstVal; Operand *SecondVal = nullptr; @@ -3087,8 +3079,8 @@ return; } case Intrinsics::Cttz: { - // The "is zero undef" parameter is ignored and we always return - // a well-defined value. + // The "is zero undef" parameter is ignored and we always return a + // well-defined value. Operand *Val = legalize(Instr->getArg(0)); Operand *FirstVal; Operand *SecondVal = nullptr; @@ -3108,8 +3100,8 @@ Type Ty = Src->getType(); Variable *Dest = Instr->getDest(); Variable *T = makeVectorOfFabsMask(Ty); - // The pand instruction operates on an m128 memory operand, so if - // Src is an f32 or f64, we need to make sure it's in a register. + // The pand instruction operates on an m128 memory operand, so if Src is an + // f32 or f64, we need to make sure it's in a register. if (isVectorType(Ty)) { if (llvm::isa<typename Traits::X86OperandMem>(Src)) Src = legalizeToReg(Src); @@ -3694,8 +3686,8 @@ Variable *Reg; // Copy the data into registers as the source and destination could overlap - // so make sure not to clobber the memory. This also means overlapping moves - // can be used as we are taking a safe snapshot of the memory. + // so make sure not to clobber the memory. This also means overlapping + // moves can be used as we are taking a safe snapshot of the memory. Type Ty = largestTypeInSize(CountValue); uint32_t TyWidth = typeWidthInBytes(Ty); @@ -3896,8 +3888,7 @@ inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata, Variable *&Var, const Inst *&Reason) { - // Var originates from Var=SrcVar ==> - // set Var:=SrcVar + // Var originates from Var=SrcVar ==> set Var:=SrcVar if (Var == nullptr) return false; if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) { @@ -4059,10 +4050,10 @@ (void)Offset; // TODO: pattern-match for non-zero offsets. if (Base == nullptr) return; - // If the Base has more than one use or is live across multiple - // blocks, then don't go further. Alternatively (?), never consider - // a transformation that would change a variable that is currently - // *not* live across basic block boundaries into one that *is*. + // If the Base has more than one use or is live across multiple blocks, then + // don't go further. Alternatively (?), never consider a transformation that + // would change a variable that is currently *not* live across basic block + // boundaries into one that *is*. if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/) return; @@ -4232,8 +4223,8 @@ Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); if (InstructionSet >= Traits::SSE4_1) { - // TODO(wala): If the condition operand is a constant, use blendps - // or pblendw. + // TODO(wala): If the condition operand is a constant, use blendps or + // pblendw. // // Use blendvps or pblendvb to implement select. if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || @@ -4310,8 +4301,8 @@ _cmp(CmpOpnd0, CmpOpnd1); if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { - // The cmov instruction doesn't allow 8-bit or FP operands, so - // we need explicit control flow. + // The cmov instruction doesn't allow 8-bit or FP operands, so we need + // explicit control flow. // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: typename Traits::Insts::Label *Label = Traits::Insts::Label::create(Func, this); @@ -4324,8 +4315,8 @@ return; } // mov t, SrcF; cmov_cond t, SrcT; mov dest, t - // But if SrcT is immediate, we might be able to do better, as - // the cmov instruction doesn't allow an immediate operand: + // But if SrcT is immediate, we might be able to do better, as the cmov + // instruction doesn't allow an immediate operand: // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { std::swap(SrcT, SrcF); @@ -4686,8 +4677,8 @@ /// %cmp.ext = sext <n x i1> %cmp to <n x ty> /// /// We can eliminate the sext operation by copying the result of pcmpeqd, -/// pcmpgtd, or cmpps (which produce sign extended results) to the result -/// of the sext operation. +/// pcmpgtd, or cmpps (which produce sign extended results) to the result of the +/// sext operation. template <class Machine> void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( Variable *SignExtendedResult) { @@ -4712,13 +4703,12 @@ template <class Machine> void TargetX86Base<Machine>::lowerRMW( const typename Traits::Insts::FakeRMW *RMW) { - // If the beacon variable's live range does not end in this - // instruction, then it must end in the modified Store instruction - // that follows. This means that the original Store instruction is - // still there, either because the value being stored is used beyond - // the Store instruction, or because dead code elimination did not - // happen. In either case, we cancel RMW lowering (and the caller - // deletes the RMW instruction). + // If the beacon variable's live range does not end in this instruction, then + // it must end in the modified Store instruction that follows. This means + // that the original Store instruction is still there, either because the + // value being stored is used beyond the Store instruction, or because dead + // code elimination did not happen. In either case, we cancel RMW lowering + // (and the caller deletes the RMW instruction). if (!RMW->isLastUse(RMW->getBeacon())) return; Operand *Src = RMW->getData(); @@ -4800,10 +4790,9 @@ } } -/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to -/// preserve integrity of liveness analysis. Undef values are also -/// turned into zeroes, since loOperand() and hiOperand() don't expect -/// Undef input. +/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve +/// integrity of liveness analysis. Undef values are also turned into zeroes, +/// since loOperand() and hiOperand() don't expect Undef input. template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { if (Traits::Is64Bit) { // On x86-64 we don't need to prelower phis -- the architecture can handle @@ -4811,25 +4800,25 @@ return; } - // Pause constant blinding or pooling, blinding or pooling will be done - // later during phi lowering assignments + // Pause constant blinding or pooling, blinding or pooling will be done later + // during phi lowering assignments BoolFlagSaver B(RandomizationPoolingPaused, true); PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( this, Context.getNode(), Func); } -// There is no support for loading or emitting vector constants, so the -// vector values returned from makeVectorOfZeros, makeVectorOfOnes, -// etc. are initialized with register operations. +// There is no support for loading or emitting vector constants, so the vector +// values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are +// initialized with register operations. // -// TODO(wala): Add limited support for vector constants so that -// complex initialization in registers is unnecessary. +// TODO(wala): Add limited support for vector constants so that complex +// initialization in registers is unnecessary. template <class Machine> Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) { Variable *Reg = makeReg(Ty, RegNum); - // Insert a FakeDef, since otherwise the live range of Reg might - // be overestimated. + // Insert a FakeDef, since otherwise the live range of Reg might be + // overestimated. Context.insert(InstFakeDef::create(Func, Reg)); _pxor(Reg, Reg); return Reg; @@ -4875,12 +4864,12 @@ } } -/// Construct a mask in a register that can be and'ed with a -/// floating-point value to mask off its sign bit. The value will be -/// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> -/// for f64. Construct it as vector of ones logically right shifted -/// one bit. TODO(stichnot): Fix the wala TODO above, to represent -/// vector constants in memory. +/// Construct a mask in a register that can be and'ed with a floating-point +/// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 +/// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of +/// ones logically right shifted one bit. +// TODO(stichnot): Fix the wala +// TODO: above, to represent vector constants in memory. template <class Machine> Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, int32_t RegNum) { @@ -4897,9 +4886,9 @@ assert(Slot->mustNotHaveReg()); assert(Slot->getRegNum() == Variable::NoRegister); // Compute the location of Loc in memory. - // TODO(wala,stichnot): lea should not be required. The address of - // the stack slot is known at compile time (although not until after - // addProlog()). + // TODO(wala,stichnot): lea should not + // be required. The address of the stack slot is known at compile time + // (although not until after addProlog()). const Type PointerType = IceType_i32; Variable *Loc = makeReg(PointerType); _lea(Loc, Slot); @@ -4925,20 +4914,19 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, int32_t RegNum) { Type Ty = From->getType(); - // Assert that a physical register is allowed. To date, all calls - // to legalize() allow a physical register. If a physical register - // needs to be explicitly disallowed, then new code will need to be - // written to force a spill. + // Assert that a physical register is allowed. To date, all calls to + // legalize() allow a physical register. If a physical register needs to be + // explicitly disallowed, then new code will need to be written to force a + // spill. assert(Allowed & Legal_Reg); - // If we're asking for a specific physical register, make sure we're - // not allowing any other operand kinds. (This could be future - // work, e.g. allow the shl shift amount to be either an immediate - // or in ecx.) + // If we're asking for a specific physical register, make sure we're not + // allowing any other operand kinds. (This could be future work, e.g. allow + // the shl shift amount to be either an immediate or in ecx.) assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg); if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { - // Before doing anything with a Mem operand, we need to ensure - // that the Base and Index components are in physical registers. + // Before doing anything with a Mem operand, we need to ensure that the + // Base and Index components are in physical registers. Variable *Base = Mem->getBase(); Variable *Index = Mem->getIndex(); Variable *RegBase = nullptr; @@ -4983,8 +4971,8 @@ } } - // If the operand is an 32 bit constant integer, we should check - // whether we need to randomize it or pool it. + // If the operand is an 32 bit constant integer, we should check whether we + // need to randomize it or pool it. if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) { Operand *NewConst = randomizeOrPoolImmediate(C, RegNum); if (NewConst != Const) { @@ -4992,8 +4980,8 @@ } } - // Convert a scalar floating point constant into an explicit - // memory operand. + // Convert a scalar floating point constant into an explicit memory + // operand. if (isScalarFloatingType(Ty)) { Variable *Base = nullptr; std::string Buffer; @@ -5016,9 +5004,9 @@ return From; } if (auto Var = llvm::dyn_cast<Variable>(From)) { - // Check if the variable is guaranteed a physical register. This - // can happen either when the variable is pre-colored or when it is - // assigned infinite weight. + // Check if the variable is guaranteed a physical register. This can happen + // either when the variable is pre-colored or when it is assigned infinite + // weight. bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); // We need a new physical register for the operand if: // Mem is not allowed and Var isn't guaranteed a physical @@ -5046,16 +5034,16 @@ Type Ty = From->getType(); if (llvm::isa<ConstantUndef>(From)) { // Lower undefs to zero. Another option is to lower undefs to an - // uninitialized register; however, using an uninitialized register - // results in less predictable code. + // uninitialized register; however, using an uninitialized register results + // in less predictable code. // - // If in the future the implementation is changed to lower undef - // values to uninitialized registers, a FakeDef will be needed: + // If in the future the implementation is changed to lower undef values to + // uninitialized registers, a FakeDef will be needed: // Context.insert(InstFakeDef::create(Func, Reg)); // This is in order to ensure that the live range of Reg is not - // overestimated. If the constant being lowered is a 64 bit value, - // then the result should be split and the lo and hi components will - // need to go in uninitialized registers. + // overestimated. If the constant being lowered is a 64 bit value, then + // the result should be split and the lo and hi components will need to go + // in uninitialized registers. if (isVectorType(Ty)) return makeVectorOfZeros(Ty, RegNum); return Ctx->getConstantZero(Ty); @@ -5063,12 +5051,11 @@ return From; } -/// For the cmp instruction, if Src1 is an immediate, or known to be a -/// physical register, we can allow Src0 to be a memory operand. -/// Otherwise, Src0 must be copied into a physical register. -/// (Actually, either Src0 or Src1 can be chosen for the physical -/// register, but unfortunately we have to commit to one or the other -/// before register allocation.) +/// For the cmp instruction, if Src1 is an immediate, or known to be a physical +/// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be +/// copied into a physical register. (Actually, either Src0 or Src1 can be +/// chosen for the physical register, but unfortunately we have to commit to one +/// or the other before register allocation.) template <class Machine> Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) { @@ -5095,11 +5082,10 @@ Constant *Offset = llvm::dyn_cast<Constant>(Opnd); assert(Base || Offset); if (Offset) { - // During memory operand building, we do not blind or pool - // the constant offset, we will work on the whole memory - // operand later as one entity later, this save one instruction. - // By turning blinding and pooling off, we guarantee - // legalize(Offset) will return a Constant*. + // During memory operand building, we do not blind or pool the constant + // offset, we will work on the whole memory operand later as one entity + // later, this save one instruction. By turning blinding and pooling off, + // we guarantee legalize(Offset) will return a Constant*. { BoolFlagSaver B(RandomizationPoolingPaused, true); @@ -5111,8 +5097,8 @@ } Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); } - // Do legalization, which contains randomization/pooling - // or do randomization/pooling. + // Do legalization, which contains randomization/pooling or do + // randomization/pooling. return llvm::cast<typename Traits::X86OperandMem>( DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); } @@ -5235,11 +5221,10 @@ // insert: lea -cookie[Reg], Reg // => Reg // If we have already assigned a phy register, we must come from - // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse - // the assigned register as this assignment is that start of its use-def - // chain. So we add RegNum argument here. - // Note we use 'lea' instruction instead of 'xor' to avoid affecting - // the flags. + // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the + // assigned register as this assignment is that start of its use-def + // chain. So we add RegNum argument here. Note we use 'lea' instruction + // instead of 'xor' to avoid affecting the flags. Variable *Reg = makeReg(IceType_i32, RegNum); ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate); uint32_t Value = Integer->getValue(); @@ -5268,8 +5253,8 @@ assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); Immediate->setShouldBePooled(true); // if we have already assigned a phy register, we must come from - // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse - // the assigned register as this assignment is that start of its use-def + // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the + // assigned register as this assignment is that start of its use-def // chain. So we add RegNum argument here. Variable *Reg = makeReg(Immediate->getType(), RegNum); IceString Label; @@ -5302,8 +5287,8 @@ return MemOperand; } - // If this memory operand is already a randommized one, we do - // not randomize it again. + // If this memory operand is already a randomized one, we do not randomize it + // again. if (MemOperand->getRandomized()) return MemOperand; @@ -5338,9 +5323,8 @@ Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); _lea(RegTemp, TempMemOperand); // As source operand doesn't use the dstreg, we don't need to add - // _set_dest_nonkillable(). - // But if we use the same Dest Reg, that is, with RegNum - // assigned, we should add this _set_dest_nonkillable() + // _set_dest_nonkillable(). But if we use the same Dest Reg, that is, + // with RegNum assigned, we should add this _set_dest_nonkillable() if (RegNum != Variable::NoRegister) _set_dest_nonkillable(); @@ -5366,12 +5350,11 @@ // =>[RegTemp, index, shift] assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); - // Memory operand should never exist as source operands in phi - // lowering assignments, so there is no need to reuse any registers - // here. For phi lowering, we should not ask for new physical - // registers in general. - // However, if we do meet Memory Operand during phi lowering, we - // should not blind or pool the immediates for now. + // Memory operand should never exist as source operands in phi lowering + // assignments, so there is no need to reuse any registers here. For + // phi lowering, we should not ask for new physical registers in + // general. However, if we do meet Memory Operand during phi lowering, + // we should not blind or pool the immediates for now. if (RegNum != Variable::NoRegister) return MemOperand; Variable *RegTemp = makeReg(IceType_i32);
diff --git a/src/IceThreading.h b/src/IceThreading.h index f59f46e..b0bcc01 100644 --- a/src/IceThreading.h +++ b/src/IceThreading.h
@@ -22,31 +22,29 @@ namespace Ice { -/// BoundedProducerConsumerQueue is a work queue that allows multiple -/// producers and multiple consumers. A producer adds entries using -/// blockingPush(), and may block if the queue is "full". A producer -/// uses notifyEnd() to indicate that no more entries will be added. A -/// consumer removes an item using blockingPop(), which will return -/// nullptr if notifyEnd() has been called and the queue is empty (it -/// never returns nullptr if the queue contained any items). +/// BoundedProducerConsumerQueue is a work queue that allows multiple producers +/// and multiple consumers. A producer adds entries using blockingPush(), and +/// may block if the queue is "full". A producer uses notifyEnd() to indicate +/// that no more entries will be added. A consumer removes an item using +/// blockingPop(), which will return nullptr if notifyEnd() has been called and +/// the queue is empty (it never returns nullptr if the queue contained any +/// items). /// -/// The MaxSize ctor arg controls the maximum size the queue can grow -/// to (subject to a hard limit of MaxStaticSize-1). The Sequential -/// arg indicates purely sequential execution in which the single -/// thread should never wait(). +/// The MaxSize ctor arg controls the maximum size the queue can grow to +/// (subject to a hard limit of MaxStaticSize-1). The Sequential arg indicates +/// purely sequential execution in which the single thread should never wait(). /// -/// Two condition variables are used in the implementation. -/// GrewOrEnded signals a waiting worker that a producer has changed -/// the state of the queue. Shrunk signals a blocked producer that a -/// consumer has changed the state of the queue. +/// Two condition variables are used in the implementation. GrewOrEnded signals +/// a waiting worker that a producer has changed the state of the queue. Shrunk +/// signals a blocked producer that a consumer has changed the state of the +/// queue. /// -/// The methods begin with Sequential-specific code to be most clear. -/// The lock and condition variables are not used in the Sequential -/// case. +/// The methods begin with Sequential-specific code to be most clear. The lock +/// and condition variables are not used in the Sequential case. /// /// Internally, the queue is implemented as a circular array of size -/// MaxStaticSize, where the queue boundaries are denoted by the Front -/// and Back fields. Front==Back indicates an empty queue. +/// MaxStaticSize, where the queue boundaries are denoted by the Front and Back +/// fields. Front==Back indicates an empty queue. template <typename T, size_t MaxStaticSize = 128> class BoundedProducerConsumerQueue { BoundedProducerConsumerQueue() = delete; @@ -60,8 +58,8 @@ void blockingPush(T *Item) { { std::unique_lock<GlobalLockType> L(Lock); - // If the work queue is already "full", wait for a consumer to - // grab an element and shrink the queue. + // If the work queue is already "full", wait for a consumer to grab an + // element and shrink the queue. Shrunk.wait(L, [this] { return size() < MaxSize || Sequential; }); push(Item); } @@ -103,27 +101,23 @@ GlobalLockType Lock; ICE_CACHELINE_BOUNDARY; - /// GrewOrEnded is written by the producers and read by the - /// consumers. It is notified (by the producer) when something is - /// added to the queue, in case consumers are waiting for a non-empty - /// queue. + /// GrewOrEnded is written by the producers and read by the consumers. It is + /// notified (by the producer) when something is added to the queue, in case + /// consumers are waiting for a non-empty queue. std::condition_variable GrewOrEnded; - /// Back is the index into WorkItems[] of where the next element will - /// be pushed. (More precisely, Back&MaxStaticSize is the index.) - /// It is written by the producers, and read by all via size() and - /// empty(). + /// Back is the index into WorkItems[] of where the next element will be + /// pushed. (More precisely, Back&MaxStaticSize is the index.) It is written + /// by the producers, and read by all via size() and empty(). size_t Back = 0; ICE_CACHELINE_BOUNDARY; - /// Shrunk is notified (by the consumer) when something is removed - /// from the queue, in case a producer is waiting for the queue to - /// drop below maximum capacity. It is written by the consumers and - /// read by the producers. + /// Shrunk is notified (by the consumer) when something is removed from the + /// queue, in case a producer is waiting for the queue to drop below maximum + /// capacity. It is written by the consumers and read by the producers. std::condition_variable Shrunk; - /// Front is the index into WorkItems[] of the oldest element, - /// i.e. the next to be popped. (More precisely Front&MaxStaticSize - /// is the index.) It is written by the consumers, and read by all - /// via size() and empty(). + /// Front is the index into WorkItems[] of the oldest element, i.e. the next + /// to be popped. (More precisely Front&MaxStaticSize is the index.) It is + /// written by the consumers, and read by all via size() and empty(). size_t Front = 0; ICE_CACHELINE_BOUNDARY; @@ -131,8 +125,7 @@ /// MaxSize and Sequential are read by all and written by none. const size_t MaxSize; const bool Sequential; - /// IsEnded is read by the consumers, and only written once by the - /// producer. + /// IsEnded is read by the consumers, and only written once by the producer. bool IsEnded = false; /// The lock must be held when the following methods are called. @@ -148,15 +141,14 @@ } }; -/// EmitterWorkItem is a simple wrapper around a pointer that -/// represents a work item to be emitted, i.e. a function or a set of -/// global declarations and initializers, and it includes a sequence -/// number so that work items can be emitted in a particular order for -/// deterministic output. It acts like an interface class, but instead -/// of making the classes of interest inherit from EmitterWorkItem, it -/// wraps pointers to these classes. Some space is wasted compared to -/// storing the pointers in a union, but not too much due to the work -/// granularity. +/// EmitterWorkItem is a simple wrapper around a pointer that represents a work +/// item to be emitted, i.e. a function or a set of global declarations and +/// initializers, and it includes a sequence number so that work items can be +/// emitted in a particular order for deterministic output. It acts like an +/// interface class, but instead of making the classes of interest inherit from +/// EmitterWorkItem, it wraps pointers to these classes. Some space is wasted +/// compared to storing the pointers in a union, but not too much due to the +/// work granularity. class EmitterWorkItem { EmitterWorkItem() = delete; EmitterWorkItem(const EmitterWorkItem &) = delete; @@ -165,20 +157,19 @@ public: /// ItemKind can be one of the following: /// - /// WI_Nop: No actual work. This is a placeholder to maintain - /// sequence numbers in case there is a translation error. + /// WI_Nop: No actual work. This is a placeholder to maintain sequence numbers + /// in case there is a translation error. /// /// WI_GlobalInits: A list of global declarations and initializers. /// - /// WI_Asm: A function that has already had emitIAS() called on it. - /// The work is transferred via the Assembler buffer, and the - /// originating Cfg has been deleted (to recover lots of memory). + /// WI_Asm: A function that has already had emitIAS() called on it. The work + /// is transferred via the Assembler buffer, and the originating Cfg has been + /// deleted (to recover lots of memory). /// - /// WI_Cfg: A Cfg that has not yet had emit() or emitIAS() called on - /// it. This is only used as a debugging configuration when we want - /// to emit "readable" assembly code, possibly annotated with - /// liveness and other information only available in the Cfg and not - /// in the Assembler buffer. + /// WI_Cfg: A Cfg that has not yet had emit() or emitIAS() called on it. This + /// is only used as a debugging configuration when we want to emit "readable" + /// assembly code, possibly annotated with liveness and other information only + /// available in the Cfg and not in the Assembler buffer. enum ItemKind { WI_Nop, WI_GlobalInits, WI_Asm, WI_Cfg }; /// Constructor for a WI_Nop work item. explicit EmitterWorkItem(uint32_t Seq);
diff --git a/src/IceTimerTree.cpp b/src/IceTimerTree.cpp index dc4622d..133cd41 100644 --- a/src/IceTimerTree.cpp +++ b/src/IceTimerTree.cpp
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file defines the TimerTree class, which tracks flat and -/// cumulative execution time collection of call chains. +/// This file defines the TimerTree class, which tracks flat and cumulative +/// execution time collection of call chains. /// //===----------------------------------------------------------------------===// @@ -41,8 +41,7 @@ #undef STR } -// Returns the unique timer ID for the given Name, creating a new ID -// if needed. +// Returns the unique timer ID for the given Name, creating a new ID if needed. TimerIdT TimerStack::getTimerID(const IceString &Name) { if (!BuildDefs::dump()) return 0; @@ -55,9 +54,9 @@ return IDsIndex[Name]; } -// Creates a mapping from TimerIdT (leaf) values in the Src timer -// stack into TimerIdT values in this timer stack. Creates new -// entries in this timer stack as needed. +// Creates a mapping from TimerIdT (leaf) values in the Src timer stack into +// TimerIdT values in this timer stack. Creates new entries in this timer stack +// as needed. TimerStack::TranslationType TimerStack::translateIDsFrom(const TimerStack &Src) { size_t Size = Src.IDs.size(); @@ -68,8 +67,8 @@ return Mapping; } -// Merges two timer stacks, by combining and summing corresponding -// entries. This timer stack is updated from Src. +// Merges two timer stacks, by combining and summing corresponding entries. +// This timer stack is updated from Src. void TimerStack::mergeFrom(const TimerStack &Src) { if (!BuildDefs::dump()) return; @@ -78,11 +77,11 @@ for (const TimerTreeNode &SrcNode : Src.Nodes) { // The first node is reserved as a sentinel, so avoid it. if (SrcIndex > 0) { - // Find the full path to the Src node, translated to path - // components corresponding to this timer stack. + // Find the full path to the Src node, translated to path components + // corresponding to this timer stack. PathType MyPath = Src.getPath(SrcIndex, Mapping); - // Find a node in this timer stack corresponding to the given - // path, creating new interior nodes as necessary. + // Find a node in this timer stack corresponding to the given path, + // creating new interior nodes as necessary. TTindex MyIndex = findPath(MyPath); Nodes[MyIndex].Time += SrcNode.Time; Nodes[MyIndex].UpdateCount += SrcNode.UpdateCount; @@ -96,10 +95,9 @@ StateChangeCount += Src.StateChangeCount; } -// Constructs a path consisting of the sequence of leaf values leading -// to a given node, with the Mapping translation applied to the leaf -// values. The path ends up being in "reverse" order, i.e. from leaf -// to root. +// Constructs a path consisting of the sequence of leaf values leading to a +// given node, with the Mapping translation applied to the leaf values. The +// path ends up being in "reverse" order, i.e. from leaf to root. TimerStack::PathType TimerStack::getPath(TTindex Index, const TranslationType &Mapping) const { PathType Path; @@ -111,8 +109,8 @@ return Path; } -// Given a parent node and a leaf ID, returns the index of the -// parent's child ID, creating a new node for the child as necessary. +// Given a parent node and a leaf ID, returns the index of the parent's child +// ID, creating a new node for the child as necessary. TimerStack::TTindex TimerStack::getChildIndex(TimerStack::TTindex Parent, TimerIdT ID) { if (Nodes[Parent].Children.size() <= ID) @@ -127,12 +125,12 @@ return Nodes[Parent].Children[ID]; } -// Finds a node in the timer stack corresponding to the given path, -// creating new interior nodes as necessary. +// Finds a node in the timer stack corresponding to the given path, creating +// new interior nodes as necessary. TimerStack::TTindex TimerStack::findPath(const PathType &Path) { TTindex CurIndex = 0; - // The path is in reverse order (leaf to root), so it needs to be - // followed in reverse. + // The path is in reverse order (leaf to root), so it needs to be followed in + // reverse. for (TTindex Index : reverse_range(Path)) { CurIndex = getChildIndex(CurIndex, Index); } @@ -150,8 +148,8 @@ assert(StackTop); } -// Pops the top marker from the timer stack. Validates via assert() -// that the expected marker is popped. +// Pops the top marker from the timer stack. Validates via assert() that the +// expected marker is popped. void TimerStack::pop(TimerIdT ID) { if (!BuildDefs::dump()) return; @@ -167,15 +165,15 @@ StackTop = Nodes[StackTop].Parent; } -// At a state change (e.g. push or pop), updates the flat and -// cumulative timings for everything on the timer stack. +// At a state change (e.g. push or pop), updates the flat and cumulative +// timings for everything on the timer stack. void TimerStack::update(bool UpdateCounts) { if (!BuildDefs::dump()) return; ++StateChangeCount; - // Whenever the stack is about to change, we grab the time delta - // since the last change and add it to all active cumulative - // elements and to the flat element for the top of the stack. + // Whenever the stack is about to change, we grab the time delta since the + // last change and add it to all active cumulative elements and to the flat + // element for the top of the stack. double Current = timestamp(); double Delta = Current - LastTimestamp; if (StackTop) { @@ -198,10 +196,10 @@ assert(Next < Prefix); Prefix = Next; } - // Capture the next timestamp *after* the updates are finished. - // This minimizes how much the timer can perturb the reported - // timing. The numbers may not sum to 100%, and the missing amount - // is indicative of the overhead of timing. + // Capture the next timestamp *after* the updates are finished. This + // minimizes how much the timer can perturb the reported timing. The numbers + // may not sum to 100%, and the missing amount is indicative of the overhead + // of timing. LastTimestamp = timestamp(); } @@ -234,8 +232,8 @@ } } -// Write a printf() format string into Buf[], in the format "[%5lu] ", -// where "5" is actually the number of digits in MaxVal. E.g., +// Write a printf() format string into Buf[], in the format "[%5lu] ", where +// "5" is actually the number of digits in MaxVal. E.g., // MaxVal=0 ==> "[%1lu] " // MaxVal=5 ==> "[%1lu] " // MaxVal=9876 ==> "[%4lu] "
diff --git a/src/IceTimerTree.def b/src/IceTimerTree.def index 6db9fbc..6e12219 100644 --- a/src/IceTimerTree.def +++ b/src/IceTimerTree.def
@@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file lists predefined timing tags. New tags can be added to -// avoid a runtime string lookup. +// This file lists predefined timing tags. New tags can be added to avoid a +// runtime string lookup. // //===----------------------------------------------------------------------===//
diff --git a/src/IceTimerTree.h b/src/IceTimerTree.h index 98bbdda..796ce8c 100644 --- a/src/IceTimerTree.h +++ b/src/IceTimerTree.h
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares the TimerTree class, which allows flat and -/// cumulative execution time collection of call chains. +/// This file declares the TimerTree class, which allows flat and cumulative +/// execution time collection of call chains. /// //===----------------------------------------------------------------------===// @@ -26,23 +26,22 @@ TimerStack() = delete; TimerStack &operator=(const TimerStack &) = delete; - /// Timer tree index type. A variable of this type is used to access - /// an interior, not-necessarily-leaf node of the tree. + /// Timer tree index type. A variable of this type is used to access an + /// interior, not-necessarily-leaf node of the tree. using TTindex = std::vector<class TimerTreeNode>::size_type; - /// Representation of a path of leaf values leading to a particular - /// node. The representation happens to be in "reverse" order, - /// i.e. from leaf/interior to root, for implementation efficiency. + /// Representation of a path of leaf values leading to a particular node. The + /// representation happens to be in "reverse" order, i.e. from leaf/interior + /// to root, for implementation efficiency. using PathType = llvm::SmallVector<TTindex, 8>; - /// Representation of a mapping of leaf node indexes from one timer - /// stack to another. + /// Representation of a mapping of leaf node indexes from one timer stack to + /// another. using TranslationType = std::vector<TimerIdT>; - /// TimerTreeNode represents an interior or leaf node in the call tree. - /// It contains a list of children, a pointer to its parent, and the - /// timer ID for the node. It also holds the cumulative time spent at - /// this node and below. The children are always at a higher index in - /// the TimerTreeNode::Nodes array, and the parent is always at a lower - /// index. + /// TimerTreeNode represents an interior or leaf node in the call tree. It + /// contains a list of children, a pointer to its parent, and the timer ID for + /// the node. It also holds the cumulative time spent at this node and below. + /// The children are always at a higher index in the TimerTreeNode::Nodes + /// array, and the parent is always at a lower index. class TimerTreeNode { TimerTreeNode &operator=(const TimerTreeNode &) = delete;
diff --git a/src/IceTranslator.cpp b/src/IceTranslator.cpp index e3a32dc..68f2b1f 100644 --- a/src/IceTranslator.cpp +++ b/src/IceTranslator.cpp
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file defines the general driver class for translating ICE to -/// machine code. +/// This file defines the general driver class for translating ICE to machine +/// code. /// //===----------------------------------------------------------------------===//
diff --git a/src/IceTranslator.h b/src/IceTranslator.h index 449b216..415965a 100644 --- a/src/IceTranslator.h +++ b/src/IceTranslator.h
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares the general driver class for translating ICE to -/// machine code. +/// This file declares the general driver class for translating ICE to machine +/// code. /// //===----------------------------------------------------------------------===// @@ -30,7 +30,7 @@ class VariableDeclaration; class GlobalContext; -/// Base class for translating ICE to machine code. Derived classes convert +/// Base class for translating ICE to machine code. Derived classes convert /// other intermediate representations down to ICE, and then call the /// appropriate (inherited) methods to convert ICE into machine instructions. class Translator { @@ -48,21 +48,21 @@ const ClFlags &getFlags() const { return Ctx->getFlags(); } - /// Translates the constructed ICE function Fcn to machine code. - /// Takes ownership of Func. + /// Translates the constructed ICE function Fcn to machine code. Takes + /// ownership of Func. void translateFcn(std::unique_ptr<Cfg> Func); - /// Lowers the given list of global addresses to target. Generates - /// list of corresponding variable declarations. + /// Lowers the given list of global addresses to target. Generates list of + /// corresponding variable declarations. void lowerGlobals(std::unique_ptr<VariableDeclarationList> VariableDeclarations); /// Creates a name using the given prefix and corresponding index. std::string createUnnamedName(const IceString &Prefix, SizeT Index); - /// Reports if there is a (potential) conflict between Name, and using - /// Prefix to name unnamed names. Errors are put on Ostream. - /// Returns true if there isn't a potential conflict. + /// Reports if there is a (potential) conflict between Name, and using Prefix + /// to name unnamed names. Errors are put on Ostream. Returns true if there + /// isn't a potential conflict. bool checkIfUnnamedNameSafe(const IceString &Name, const char *Kind, const IceString &Prefix);
diff --git a/src/IceTypeConverter.h b/src/IceTypeConverter.h index cb3536c..c61423c 100644 --- a/src/IceTypeConverter.h +++ b/src/IceTypeConverter.h
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file defines how to convert LLVM types to ICE types, and ICE types -/// to LLVM types. +/// This file defines how to convert LLVM types to ICE types, and ICE types to +/// LLVM types. /// //===----------------------------------------------------------------------===// @@ -40,8 +40,8 @@ /// Context is the context to use to build llvm types. explicit TypeConverter(llvm::LLVMContext &Context); - /// Converts LLVM type LLVMTy to an ICE type. Returns - /// Ice::IceType_NUM if unable to convert. + /// Converts LLVM type LLVMTy to an ICE type. Returns Ice::IceType_NUM if + /// unable to convert. Type convertToIceType(llvm::Type *LLVMTy) const { auto Pos = LLVM2IceMap.find(LLVMTy); if (Pos == LLVM2IceMap.end())
diff --git a/src/IceTypes.cpp b/src/IceTypes.cpp index dd06b1e..6cc79b7 100644 --- a/src/IceTypes.cpp +++ b/src/IceTypes.cpp
@@ -58,8 +58,7 @@ ICETYPE_PROPS_TABLE #undef X -// Show vector definitions match in ICETYPE_TABLE and -// ICETYPE_PROPS_TABLE. +// Show vector definitions match in ICETYPE_TABLE and ICETYPE_PROPS_TABLE. // Define constants for each element size in ICETYPE_TABLE. enum {
diff --git a/src/IceTypes.def b/src/IceTypes.def index 94877a2..b86dba8 100644 --- a/src/IceTypes.def +++ b/src/IceTypes.def
@@ -7,8 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines properties of ICE primitive types in the form of -// x-macros. +// This file defines properties of ICE primitive types in the form of x-macros. // //===----------------------------------------------------------------------===// @@ -16,10 +15,10 @@ #define SUBZERO_SRC_ICETYPES_DEF // Attributes of each target architecture. -// NOTE on is_elf64 -- At some point NaCl would like to use ELF32 for all -// ILP32 sandboxes, but for now the 64-bit architectures use ELF64: -// https://code.google.com/p/nativeclient/issues/detail?id=349 -// TODO: Whoever adds AArch64 will need to set ABI e_flags. +// NOTE on is_elf64 -- At some point NaCl would like to use ELF32 for all ILP32 +// sandboxes, but for now the 64-bit architectures use ELF64: +// https://code.google.com/p/nativeclient/issues/detail?id=349 TODO: Whoever +// adds AArch64 will need to set ABI e_flags. #define TARGETARCH_TABLE \ /* enum value, printable string, is_elf64, e_machine, e_flags */ \ X(Target_X8632, "x86-32", false, EM_386, 0) \
diff --git a/src/IceTypes.h b/src/IceTypes.h index 3c87f68..f176e9b 100644 --- a/src/IceTypes.h +++ b/src/IceTypes.h
@@ -8,9 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file declares a few properties of the primitive types allowed -/// in Subzero. Every Subzero source file is expected to include -/// IceTypes.h. +/// This file declares a few properties of the primitive types allowed in +/// Subzero. Every Subzero source file is expected to include IceTypes.h. /// //===----------------------------------------------------------------------===// @@ -42,8 +41,8 @@ return Stream << targetArchString(Arch); } -/// The list of all target instruction sets. Individual targets will -/// map this to include only what is valid for the target. +/// The list of all target instruction sets. Individual targets will map this to +/// include only what is valid for the target. enum TargetInstructionSet { // Represents baseline that can be assumed for a target (usually "Begin"). BaseInstructionSet, @@ -97,8 +96,8 @@ return result; } -/// Check if Ty is byte sized and specifically i8. Assert that it's not -/// byte sized due to being an i1. +/// Check if Ty is byte sized and specifically i8. Assert that it's not byte +/// sized due to being an i1. inline bool isByteSizedArithType(Type Ty) { assert(Ty != IceType_i1); return Ty == IceType_i8; @@ -131,8 +130,8 @@ public: using ArgListType = std::vector<Type>; - /// Creates a function signature type with the given return type. - /// Parameter types should be added using calls to appendArgType. + /// Creates a function signature type with the given return type. Parameter + /// types should be added using calls to appendArgType. FuncSigType() = default; FuncSigType(const FuncSigType &Ty) = default;
diff --git a/src/IceUtils.h b/src/IceUtils.h index f07a566..9387671 100644 --- a/src/IceUtils.h +++ b/src/IceUtils.h
@@ -19,11 +19,10 @@ namespace Ice { -/// Similar to bit_cast, but allows copying from types of unrelated -/// sizes. This method was introduced to enable the strict aliasing -/// optimizations of GCC 4.4. Basically, GCC mindlessly relies on -/// obscure details in the C++ standard that make reinterpret_cast -/// virtually useless. +/// Similar to bit_cast, but allows copying from types of unrelated sizes. This +/// method was introduced to enable the strict aliasing optimizations of GCC +/// 4.4. Basically, GCC mindlessly relies on obscure details in the C++ standard +/// that make reinterpret_cast virtually useless. template <class D, class S> inline D bit_copy(const S &source) { D destination; // This use of memcpy is safe: source and destination cannot overlap. @@ -63,8 +62,8 @@ return IsUint(N, Value); } - /// Return true if the addition X + Y will cause integer overflow for - /// integers of type T. + /// Return true if the addition X + Y will cause integer overflow for integers + /// of type T. template <typename T> static inline bool WouldOverflowAdd(T X, T Y) { return ((X > 0 && Y > 0 && (X > std::numeric_limits<T>::max() - Y)) || (X < 0 && Y < 0 && (X < std::numeric_limits<T>::min() - Y)));
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp index 023a433..d8d1860 100644 --- a/src/PNaClTranslator.cpp +++ b/src/PNaClTranslator.cpp
@@ -41,11 +41,10 @@ namespace { using namespace llvm; -// Models elements in the list of types defined in the types block. -// These elements can be undefined, a (simple) type, or a function type -// signature. Note that an extended type is undefined on construction. -// Use methods setAsSimpleType and setAsFuncSigType to define -// the extended type. +// Models elements in the list of types defined in the types block. These +// elements can be undefined, a (simple) type, or a function type signature. +// Note that an extended type is undefined on construction. Use methods +// setAsSimpleType and setAsFuncSigType to define the extended type. class ExtendedType { ExtendedType &operator=(const ExtendedType &Ty) = delete; @@ -61,8 +60,7 @@ ExtendedType::TypeKind getKind() const { return Kind; } void dump(Ice::Ostream &Stream) const; - /// Changes the extended type to a simple type with the given - /// value. + /// Changes the extended type to a simple type with the given / value. void setAsSimpleType(Ice::Type Ty) { assert(Kind == Undefined); Kind = Simple; @@ -76,8 +74,8 @@ } protected: - // Note: For simple types, the return type of the signature will - // be used to hold the simple type. + // Note: For simple types, the return type of the signature will be used to + // hold the simple type. Ice::FuncSigType Signature; private: @@ -180,16 +178,15 @@ BlockParser = NewBlockParser; } - /// Generates error with given Message, occurring at BitPosition - /// within the bitcode file. Always returns true. + /// Generates error with given Message, occurring at BitPosition within the + /// bitcode file. Always returns true. bool ErrorAt(naclbitc::ErrorLevel Level, uint64_t BitPosition, const std::string &Message) final; /// Generates error message with respect to the current block parser. bool blockError(const std::string &Message); - /// Returns the number of errors found while parsing the bitcode - /// file. + /// Returns the number of errors found while parsing the bitcode file. unsigned getNumErrors() const { return NumErrors; } /// Changes the size of the type list to the given size. @@ -202,11 +199,11 @@ return Translator.getFlags().getDisableIRGeneration(); } - /// Returns the undefined type associated with type ID. - /// Note: Returns extended type ready to be defined. + /// Returns the undefined type associated with type ID. Note: Returns extended + /// type ready to be defined. ExtendedType *getTypeByIDForDefining(NaClBcIndexSize_t ID) { - // Get corresponding element, verifying the value is still undefined - // (and hence allowed to be defined). + // Get corresponding element, verifying the value is still undefined (and + // hence allowed to be defined). ExtendedType *Ty = getTypeByIDAsKind(ID, ExtendedType::Undefined); if (Ty) return Ty; @@ -248,9 +245,9 @@ FunctionDeclarations.push_back(Fcn); } - /// Returns the value id that should be associated with the the - /// current function block. Increments internal counters during call - /// so that it will be in correct position for next function block. + /// Returns the value id that should be associated with the the current + /// function block. Increments internal counters during call so that it will + /// be in correct position for next function block. NaClBcIndexSize_t getNextFunctionBlockValueID() { size_t NumDeclaredFunctions = FunctionDeclarations.size(); while (NextDefiningFunctionID < NumDeclaredFunctions && @@ -274,9 +271,9 @@ return ValueIDConstants[ID]; } - /// Install names for all global values without names. Called after - /// the global value symbol table is processed, but before any - /// function blocks are processed. + /// Install names for all global values without names. Called after the global + /// value symbol table is processed, but before any function blocks are + /// processed. void installGlobalNames() { assert(VariableDeclarations); installGlobalVarNames(); @@ -294,8 +291,8 @@ /// Returns the number of function declarations in the bitcode file. size_t getNumFunctionIDs() const { return FunctionDeclarations.size(); } - /// Returns the number of global declarations (i.e. IDs) defined in - /// the bitcode file. + /// Returns the number of global declarations (i.e. IDs) defined in the + /// bitcode file. size_t getNumGlobalIDs() const { if (VariableDeclarations) { return FunctionDeclarations.size() + VariableDeclarations->size(); @@ -319,8 +316,8 @@ return reportGetGlobalVariableByIDError(Index); } - /// Returns the global declaration (variable or function) with the - /// given Index. + /// Returns the global declaration (variable or function) with the given + /// Index. Ice::GlobalDeclaration *getGlobalDeclarationByID(NaClBcIndexSize_t Index) { size_t NumFunctionIds = FunctionDeclarations.size(); if (Index < NumFunctionIds) @@ -329,13 +326,12 @@ return getGlobalVariableByID(Index - NumFunctionIds); } - /// Returns the list of parsed global variable - /// declarations. Releases ownership of the current list of global - /// variables. Note: only returns non-null pointer on first - /// call. All successive calls return a null pointer. + /// Returns the list of parsed global variable declarations. Releases + /// ownership of the current list of global variables. Note: only returns + /// non-null pointer on first call. All successive calls return a null + /// pointer. std::unique_ptr<Ice::VariableDeclarationList> getGlobalVariables() { - // Before returning, check that ValidIDConstants has already been - // built. + // Before returning, check that ValidIDConstants has already been built. assert(!VariableDeclarations || VariableDeclarations->size() <= ValueIDConstants.size()); return std::move(VariableDeclarations); @@ -364,16 +360,14 @@ Ice::ConstantList ValueIDConstants; // Error recovery value to use when getFuncSigTypeByID fails. Ice::FuncSigType UndefinedFuncSigType; - // The block parser currently being applied. Used for error - // reporting. + // The block parser currently being applied. Used for error reporting. BlockParserBaseClass *BlockParser = nullptr; bool ParseBlock(unsigned BlockID) override; - // Gets extended type associated with the given index, assuming the - // extended type is of the WantedKind. Generates error message if - // corresponding extended type of WantedKind can't be found, and - // returns nullptr. + // Gets extended type associated with the given index, assuming the extended + // type is of the WantedKind. Generates error message if corresponding + // extended type of WantedKind can't be found, and returns nullptr. ExtendedType *getTypeByIDAsKind(NaClBcIndexSize_t ID, ExtendedType::TypeKind WantedKind) { ExtendedType *Ty = nullptr; @@ -387,12 +381,11 @@ return nullptr; } - // Gives Decl a name if it doesn't already have one. Prefix and - // NameIndex are used to generate the name. NameIndex is - // automatically incremented if a new name is created. DeclType is - // literal text describing the type of name being created. Also - // generates warning if created names may conflict with named - // declarations. + // Gives Decl a name if it doesn't already have one. Prefix and NameIndex are + // used to generate the name. NameIndex is automatically incremented if a new + // name is created. DeclType is literal text describing the type of name + // being created. Also generates warning if created names may conflict with + // named declarations. void installDeclarationName(Ice::GlobalDeclaration *Decl, const Ice::IceString &Prefix, const char *DeclType, @@ -431,7 +424,7 @@ } // Builds a constant symbol named Name, suppressing name mangling if - // SuppressMangling. IsExternal is true iff the symbol is external. + // SuppressMangling. IsExternal is true iff the symbol is external. Ice::Constant *getConstantSym(const Ice::IceString &Name, bool SuppressMangling, bool IsExternal) const { if (IsExternal) { @@ -471,17 +464,17 @@ void reportBadTypeIDAs(NaClBcIndexSize_t ID, const ExtendedType *Ty, ExtendedType::TypeKind WantedType); - // Reports that there is no function declaration for ID. Returns an - // error recovery value to use. + // Reports that there is no function declaration for ID. Returns an error + // recovery value to use. Ice::FunctionDeclaration *reportGetFunctionByIDError(NaClBcIndexSize_t ID); - // Reports that there is not global variable declaration for - // ID. Returns an error recovery value to use. + // Reports that there is not global variable declaration for ID. Returns an + // error recovery value to use. Ice::VariableDeclaration * reportGetGlobalVariableByIDError(NaClBcIndexSize_t Index); - // Reports that there is no corresponding ICE type for LLVMTy, and - // returns Ice::IceType_void. + // Reports that there is no corresponding ICE type for LLVMTy, and returns + // Ice::IceType_void. Ice::Type convertToIceTypeError(Type *LLVMTy); }; @@ -549,10 +542,9 @@ return Ice::IceType_void; } -// Base class for parsing blocks within the bitcode file. Note: -// Because this is the base class of block parsers, we generate error -// messages if ParseBlock or ParseRecord is not overridden in derived -// classes. +// Base class for parsing blocks within the bitcode file. Note: Because this is +// the base class of block parsers, we generate error messages if ParseBlock or +// ParseRecord is not overridden in derived classes. class BlockParserBaseClass : public NaClBitcodeParser { BlockParserBaseClass() = delete; BlockParserBaseClass(const BlockParserBaseClass &) = delete; @@ -595,16 +587,15 @@ return getTranslator().getFlags().getDisableIRGeneration(); } - // Default implementation. Reports that block is unknown and skips - // its contents. + // Default implementation. Reports that block is unknown and skips its + // contents. bool ParseBlock(unsigned BlockID) override; - // Default implementation. Reports that the record is not - // understood. + // Default implementation. Reports that the record is not understood. void ProcessRecord() override; - // Checks if the size of the record is Size. Return true if valid. - // Otherwise generates an error and returns false. + // Checks if the size of the record is Size. Return true if valid. Otherwise + // generates an error and returns false. bool isValidRecordSize(size_t Size, const char *RecordName) { const NaClBitcodeRecord::RecordVector &Values = Record.GetValues(); if (Values.size() == Size) @@ -613,9 +604,8 @@ return false; } - // Checks if the size of the record is at least as large as the - // LowerLimit. Returns true if valid. Otherwise generates an error - // and returns false. + // Checks if the size of the record is at least as large as the LowerLimit. + // Returns true if valid. Otherwise generates an error and returns false. bool isValidRecordSizeAtLeast(size_t LowerLimit, const char *RecordName) { const NaClBitcodeRecord::RecordVector &Values = Record.GetValues(); if (Values.size() >= LowerLimit) @@ -625,8 +615,8 @@ } // Checks if the size of the record is no larger than the - // UpperLimit. Returns true if valid. Otherwise generates an error - // and returns false. + // UpperLimit. Returns true if valid. Otherwise generates an error and + // returns false. bool isValidRecordSizeAtMost(size_t UpperLimit, const char *RecordName) { const NaClBitcodeRecord::RecordVector &Values = Record.GetValues(); if (Values.size() <= UpperLimit) @@ -635,9 +625,9 @@ return false; } - // Checks if the size of the record is at least as large as the - // LowerLimit, and no larger than the UpperLimit. Returns true if - // valid. Otherwise generates an error and returns false. + // Checks if the size of the record is at least as large as the LowerLimit, + // and no larger than the UpperLimit. Returns true if valid. Otherwise + // generates an error and returns false. bool isValidRecordSizeInRange(size_t LowerLimit, size_t UpperLimit, const char *RecordName) { return isValidRecordSizeAtLeast(LowerLimit, RecordName) || @@ -645,11 +635,10 @@ } private: - /// Generates a record size error. ExpectedSize is the number - /// of elements expected. RecordName is the name of the kind of - /// record that has incorrect size. ContextMessage (if not nullptr) - /// is appended to "record expects" to describe how ExpectedSize - /// should be interpreted. + /// Generates a record size error. ExpectedSize is the number of elements + /// expected. RecordName is the name of the kind of record that has incorrect + /// size. ContextMessage (if not nullptr) is appended to "record expects" to + /// describe how ExpectedSize should be interpreted. void reportRecordSizeError(size_t ExpectedSize, const char *RecordName, const char *ContextMessage); }; @@ -666,9 +655,9 @@ const std::string &Message) { std::string Buffer; raw_string_ostream StrBuf(Buffer); - // Note: If dump routines have been turned off, the error messages - // will not be readable. Hence, replace with simple error. We also - // use the simple form for unit tests. + // Note: If dump routines have been turned off, the error messages will not + // be readable. Hence, replace with simple error. We also use the simple form + // for unit tests. if (getFlags().getGenerateUnitTestMessages()) { StrBuf << "Invalid " << getBlockName() << " record: <" << Record.GetCode(); for (const uint64_t Val : Record.GetValues()) { @@ -700,8 +689,8 @@ } bool BlockParserBaseClass::ParseBlock(unsigned BlockID) { - // If called, derived class doesn't know how to handle block. - // Report error and skip. + // If called, derived class doesn't know how to handle block. Report error + // and skip. std::string Buffer; raw_string_ostream StrBuf(Buffer); StrBuf << "Don't know how to parse block id: " << BlockID; @@ -742,8 +731,8 @@ private: Ice::TimerMarker Timer; - // The type ID that will be associated with the next type defining - // record in the types block. + // The type ID that will be associated with the next type defining record in + // the types block. NaClBcIndexSize_t NextTypeId = 0; // The expected number of types, based on record TYPE_CODE_NUMENTRY. @@ -773,13 +762,11 @@ Error(StrBuf.str()); ExpectedNumTypes = NaClBcIndexSize_t_Max; } - // The code double checks that Expected size and the actual size - // at the end of the block. To reduce allocations we preallocate - // the space. + // The code double checks that Expected size and the actual size at the end + // of the block. To reduce allocations we preallocate the space. // - // However, if the number is large, we suspect that the number - // is (possibly) incorrect. In that case, we preallocate a - // smaller space. + // However, if the number is large, we suspect that the number is + // (possibly) incorrect. In that case, we preallocate a smaller space. constexpr uint64_t DefaultLargeResizeValue = 1000000; Context->resizeTypeIDValues(std::min(Size, DefaultLargeResizeValue)); ExpectedNumTypes = Size; @@ -902,9 +889,9 @@ FuncSigExtendedType *FuncTy = cast<FuncSigExtendedType>(Ty); FuncTy->setReturnType(Context->getSimpleTypeByID(Values[1])); for (size_t i = 2, e = Values.size(); i != e; ++i) { - // Check that type void not used as argument type. - // Note: PNaCl restrictions can't be checked until we - // know the name, because we have to check for intrinsic signatures. + // Check that type void not used as argument type. Note: PNaCl + // restrictions can't be checked until we know the name, because we have + // to check for intrinsic signatures. Ice::Type ArgTy = Context->getSimpleTypeByID(Values[i]); if (ArgTy == Ice::IceType_void) { std::string Buffer; @@ -956,8 +943,8 @@ // Holds the number of defined function IDs. NaClBcIndexSize_t NumFunctionIDs; - // Holds the specified number of global variables by the count record in - // the global variables block. + // Holds the specified number of global variables by the count record in the + // global variables block. NaClBcIndexSize_t SpecifiedNumberVars = 0; // Keeps track of how many initializers are expected for the global variable @@ -967,9 +954,8 @@ // The index of the next global variable declaration. NaClBcIndexSize_t NextGlobalID = 0; - // Dummy global variable declaration to guarantee CurGlobalVar is - // always defined (allowing code to not need to check if - // CurGlobalVar is nullptr). + // Dummy global variable declaration to guarantee CurGlobalVar is always + // defined (allowing code to not need to check if CurGlobalVar is nullptr). Ice::VariableDeclaration *DummyGlobalVar; // Holds the current global variable declaration being built. @@ -1230,14 +1216,13 @@ getTranslator().getContext()->pushTimer(TimerID, StackID); } - // Note: The Cfg is created, even when IR generation is disabled. This - // is done to install a CfgLocalAllocator for various internal containers. + // Note: The Cfg is created, even when IR generation is disabled. This is + // done to install a CfgLocalAllocator for various internal containers. Func = Ice::Cfg::create(getTranslator().getContext(), getTranslator().getNextSequenceNumber()); Ice::Cfg::setCurrentCfg(Func.get()); - // TODO(kschimpf) Clean up API to add a function signature to - // a CFG. + // TODO(kschimpf) Clean up API to add a function signature to a CFG. const Ice::FuncSigType &Signature = FuncDecl->getSignature(); if (isIRGenerationDisabled()) { CurrentNode = nullptr; @@ -1257,17 +1242,17 @@ } bool ParserResult = ParseThisBlock(); - // Temporarily end per-function timing, which will be resumed by - // the translator function. This is because translation may be - // done asynchronously in a separate thread. + // Temporarily end per-function timing, which will be resumed by the + // translator function. This is because translation may be done + // asynchronously in a separate thread. if (TimeThisFunction) getTranslator().getContext()->popTimer(TimerID, StackID); Ice::Cfg::setCurrentCfg(nullptr); - // Note: Once any errors have been found, we turn off all - // translation of all remaining functions. This allows successive - // parsing errors to be reported, without adding extra checks to - // the translator for such parsing errors. + // Note: Once any errors have been found, we turn off all translation of + // all remaining functions. This allows successive parsing errors to be + // reported, without adding extra checks to the translator for such parsing + // errors. if (Context->getNumErrors() == 0 && Func) { getTranslator().translateFcn(std::move(Func)); // The translator now has ownership of Func. @@ -1332,21 +1317,20 @@ Ice::FunctionDeclaration *FuncDecl; // Holds the dividing point between local and global absolute value indices. size_t CachedNumGlobalValueIDs; - // Holds operands local to the function block, based on indices - // defined in the bitcode file. + // Holds operands local to the function block, based on indices defined in + // the bitcode file. Ice::OperandList LocalOperands; - // Holds the index within LocalOperands corresponding to the next - // instruction that generates a value. + // Holds the index within LocalOperands corresponding to the next instruction + // that generates a value. NaClBcIndexSize_t NextLocalInstIndex; - // True if the last processed instruction was a terminating - // instruction. + // True if the last processed instruction was a terminating instruction. bool InstIsTerminating = false; // Upper limit of alignment power allowed by LLVM static const uint32_t AlignPowerLimit = 29; - // Extracts the corresponding Alignment to use, given the AlignPower - // (i.e. 2**(AlignPower-1), or 0 if AlignPower == 0). InstName is the - // name of the instruction the alignment appears in. + // Extracts the corresponding Alignment to use, given the AlignPower (i.e. + // 2**(AlignPower-1), or 0 if AlignPower == 0). InstName is the name of the + // instruction the alignment appears in. void extractAlignment(const char *InstName, uint32_t AlignPower, uint32_t &Alignment) { if (AlignPower <= AlignPowerLimit + 1) { @@ -1396,10 +1380,9 @@ return Func->getNodes()[Index]; } - // Returns the Index-th basic block in the list of basic blocks. - // Assumes Index corresponds to a branch instruction. Hence, if - // the branch references the entry block, it also generates a - // corresponding error. + // Returns the Index-th basic block in the list of basic blocks. Assumes + // Index corresponds to a branch instruction. Hence, if the branch references + // the entry block, it also generates a corresponding error. Ice::CfgNode *getBranchBasicBlock(NaClBcIndexSize_t Index) { assert(!isIRGenerationDisabled()); if (Index == 0) { @@ -1448,8 +1431,7 @@ return Var; } - // Converts a relative index (wrt to BaseIndex) to an absolute value - // index. + // Converts a relative index (wrt to BaseIndex) to an absolute value index. NaClBcIndexSize_t convertRelativeToAbsIndex(NaClRelBcIndexSize_t Id, NaClRelBcIndexSize_t BaseIndex) { if (BaseIndex < Id) { @@ -1508,8 +1490,8 @@ LocalOperands[LocalIndex] = Op; } - // Returns the relative operand (wrt to BaseIndex) referenced by - // the given value Index. + // Returns the relative operand (wrt to BaseIndex) referenced by the given + // value Index. Ice::Operand *getRelativeOperand(NaClBcIndexSize_t Index, NaClBcIndexSize_t BaseIndex) { return getOperand(convertRelativeToAbsIndex(Index, BaseIndex)); @@ -1518,13 +1500,12 @@ // Returns the absolute index of the next value generating instruction. NaClBcIndexSize_t getNextInstIndex() const { return NextLocalInstIndex; } - // Generates type error message for binary operator Op - // operating on Type OpTy. + // Generates type error message for binary operator Op operating on Type + // OpTy. void reportInvalidBinaryOp(Ice::InstArithmetic::OpKind Op, Ice::Type OpTy); - // Validates if integer logical Op, for type OpTy, is valid. - // Returns true if valid. Otherwise generates error message and - // returns false. + // Validates if integer logical Op, for type OpTy, is valid. Returns true if + // valid. Otherwise generates error message and returns false. bool isValidIntegerLogicalOp(Ice::InstArithmetic::OpKind Op, Ice::Type OpTy) { if (Ice::isIntegerType(OpTy)) return true; @@ -1532,9 +1513,9 @@ return false; } - // Validates if integer (or vector of integers) arithmetic Op, for type - // OpTy, is valid. Returns true if valid. Otherwise generates - // error message and returns false. + // Validates if integer (or vector of integers) arithmetic Op, for type OpTy, + // is valid. Returns true if valid. Otherwise generates error message and + // returns false. bool isValidIntegerArithOp(Ice::InstArithmetic::OpKind Op, Ice::Type OpTy) { if (Ice::isIntegerArithmeticType(OpTy)) return true; @@ -1542,9 +1523,8 @@ return false; } - // Checks if floating arithmetic Op, for type OpTy, is valid. - // Returns true if valid. Otherwise generates an error message and - // returns false; + // Checks if floating arithmetic Op, for type OpTy, is valid. Returns true if + // valid. Otherwise generates an error message and returns false; bool isValidFloatingArithOp(Ice::InstArithmetic::OpKind Op, Ice::Type OpTy) { if (Ice::isFloatingType(OpTy)) return true; @@ -1552,9 +1532,9 @@ return false; } - // Checks if the type of operand Op is the valid pointer type, for - // the given InstructionName. Returns true if valid. Otherwise - // generates an error message and returns false. + // Checks if the type of operand Op is the valid pointer type, for the given + // InstructionName. Returns true if valid. Otherwise generates an error + // message and returns false. bool isValidPointerType(Ice::Operand *Op, const char *InstructionName) { Ice::Type PtrType = Ice::getPointerType(); if (Op->getType() == PtrType) @@ -1567,9 +1547,8 @@ return false; } - // Checks if loading/storing a value of type Ty is allowed. - // Returns true if Valid. Otherwise generates an error message and - // returns false. + // Checks if loading/storing a value of type Ty is allowed. Returns true if + // Valid. Otherwise generates an error message and returns false. bool isValidLoadStoreType(Ice::Type Ty, const char *InstructionName) { if (isLoadStoreType(Ty)) return true; @@ -1580,9 +1559,8 @@ return false; } - // Checks if loading/storing a value of type Ty is allowed for - // the given Alignment. Otherwise generates an error message and - // returns false. + // Checks if loading/storing a value of type Ty is allowed for the given + // Alignment. Otherwise generates an error message and returns false. bool isValidLoadStoreAlignment(size_t Alignment, Ice::Type Ty, const char *InstructionName) { if (!isValidLoadStoreType(Ty, InstructionName)) @@ -1598,8 +1576,8 @@ } // Defines if the given alignment is valid for the given type. Simplified - // version of PNaClABIProps::isAllowedAlignment, based on API's offered - // for Ice::Type. + // version of PNaClABIProps::isAllowedAlignment, based on API's offered for + // Ice::Type. bool isAllowedAlignment(size_t Alignment, Ice::Type Ty) const { return Alignment == typeAlignInBytes(Ty) || (Alignment == 1 && !isVectorType(Ty)); @@ -1655,9 +1633,9 @@ return VectorIndexValid; } - // Takes the PNaCl bitcode binary operator Opcode, and the opcode - // type Ty, and sets Op to the corresponding ICE binary - // opcode. Returns true if able to convert, false otherwise. + // Takes the PNaCl bitcode binary operator Opcode, and the opcode type Ty, + // and sets Op to the corresponding ICE binary opcode. Returns true if able + // to convert, false otherwise. bool convertBinopOpcode(unsigned Opcode, Ice::Type Ty, Ice::InstArithmetic::OpKind &Op) { switch (Opcode) { @@ -1736,9 +1714,9 @@ } } - /// Simplifies out vector types from Type1 and Type2, if both are vectors - /// of the same size. Returns true iff both are vectors of the same size, - /// or are both scalar types. + /// Simplifies out vector types from Type1 and Type2, if both are vectors of + /// the same size. Returns true iff both are vectors of the same size, or are + /// both scalar types. static bool simplifyOutCommonVectorType(Ice::Type &Type1, Ice::Type &Type2) { bool IsType1Vector = isVectorType(Type1); bool IsType2Vector = isVectorType(Type2); @@ -1781,8 +1759,8 @@ return isFloatTruncCastValid(TargetType, SourceType); } - /// Returns true iff a cast from floating type SourceType to integer - /// type TargetType is valid. + /// Returns true iff a cast from floating type SourceType to integer type + /// TargetType is valid. static bool isFloatToIntCastValid(Ice::Type SourceType, Ice::Type TargetType) { if (!(Ice::isFloatingType(SourceType) && Ice::isIntegerType(TargetType))) @@ -1797,15 +1775,15 @@ return true; } - /// Returns true iff a cast from integer type SourceType to floating - /// type TargetType is valid. + /// Returns true iff a cast from integer type SourceType to floating type + /// TargetType is valid. static bool isIntToFloatCastValid(Ice::Type SourceType, Ice::Type TargetType) { return isFloatToIntCastValid(TargetType, SourceType); } - /// Returns the number of bits used to model type Ty when defining the - /// bitcast instruction. + /// Returns the number of bits used to model type Ty when defining the bitcast + /// instruction. static Ice::SizeT bitcastSizeInBits(Ice::Type Ty) { if (Ice::isVectorType(Ty)) return Ice::typeNumElements(Ty) * @@ -1820,10 +1798,10 @@ return bitcastSizeInBits(SourceType) == bitcastSizeInBits(TargetType); } - /// Returns true iff the NaCl bitcode Opcode is a valid cast opcode - /// for converting SourceType to TargetType. Updates CastKind to the - /// corresponding instruction cast opcode. Also generates an error - /// message when this function returns false. + /// Returns true iff the NaCl bitcode Opcode is a valid cast opcode for + /// converting SourceType to TargetType. Updates CastKind to the corresponding + /// instruction cast opcode. Also generates an error message when this + /// function returns false. bool convertCastOpToIceOp(uint64_t Opcode, Ice::Type SourceType, Ice::Type TargetType, Ice::InstCast::OpKind &CastKind) { @@ -1888,8 +1866,8 @@ return Result; } - // Converts PNaCl bitcode Icmp operator to corresponding ICE op. - // Returns true if able to convert, false otherwise. + // Converts PNaCl bitcode Icmp operator to corresponding ICE op. Returns true + // if able to convert, false otherwise. bool convertNaClBitcICmpOpToIce(uint64_t Op, Ice::InstIcmp::ICond &Cond) const { switch (Op) { @@ -1930,8 +1908,8 @@ } } - // Converts PNaCl bitcode Fcmp operator to corresponding ICE op. - // Returns true if able to convert, false otherwise. + // Converts PNaCl bitcode Fcmp operator to corresponding ICE op. Returns true + // if able to convert, false otherwise. bool convertNaClBitcFCompOpToIce(uint64_t Op, Ice::InstFcmp::FCond &Cond) const { switch (Op) { @@ -1990,15 +1968,14 @@ } } - // Creates an error instruction, generating a value of type Ty, and - // adds a placeholder so that instruction indices line up. - // Some instructions, such as a call, will not generate a value - // if the return type is void. In such cases, a placeholder value - // for the badly formed instruction is not needed. Hence, if Ty is - // void, an error instruction is not appended. + // Creates an error instruction, generating a value of type Ty, and adds a + // placeholder so that instruction indices line up. Some instructions, such + // as a call, will not generate a value if the return type is void. In such + // cases, a placeholder value for the badly formed instruction is not needed. + // Hence, if Ty is void, an error instruction is not appended. void appendErrorInstruction(Ice::Type Ty) { - // Note: we don't worry about downstream translation errors because - // the function will not be translated if any errors occur. + // Note: we don't worry about downstream translation errors because the + // function will not be translated if any errors occur. if (Ty == Ice::IceType_void) return; Ice::Variable *Var = getNextInstVar(Ty); @@ -2025,8 +2002,8 @@ } if (isIRGenerationDisabled()) return; - // Before translating, check for blocks without instructions, and - // insert unreachable. This shouldn't happen, but be safe. + // Before translating, check for blocks without instructions, and insert + // unreachable. This shouldn't happen, but be safe. size_t Index = 0; for (Ice::CfgNode *Node : Func->getNodes()) { if (Node->getInsts().empty()) { @@ -2051,8 +2028,8 @@ } void FunctionParser::ProcessRecord() { - // Note: To better separate parse/IR generation times, when IR generation - // is disabled we do the following: + // Note: To better separate parse/IR generation times, when IR generation is + // disabled we do the following: // 1) Delay exiting until after we extract operands. // 2) return before we access operands, since all operands will be a nullptr. const NaClBitcodeRecord::RecordVector &Values = Record.GetValues(); @@ -2382,11 +2359,10 @@ // SWITCH: [Condty, Cond, BbIndex, NumCases Case ...] // where Case = [1, 1, Value, BbIndex]. // - // Note: Unlike most instructions, we don't infer the type of - // Cond, but provide it as a separate field. There are also - // unnecesary data fields (i.e. constants 1). These were not - // cleaned up in PNaCl bitcode because the bitcode format was - // already frozen when the problem was noticed. + // Note: Unlike most instructions, we don't infer the type of Cond, but + // provide it as a separate field. There are also unnecessary data fields + // (i.e. constants 1). These were not cleaned up in PNaCl bitcode because + // the bitcode format was already frozen when the problem was noticed. InstIsTerminating = true; if (!isValidRecordSizeAtLeast(4, "switch")) return; @@ -2591,13 +2567,12 @@ // CALL: [cc, fnid, arg0, arg1...] // CALL_INDIRECT: [cc, fn, returnty, args...] // - // Note: The difference between CALL and CALL_INDIRECT is that - // CALL has a reference to an explicit function declaration, while - // the CALL_INDIRECT is just an address. For CALL, we can infer - // the return type by looking up the type signature associated - // with the function declaration. For CALL_INDIRECT we can only - // infer the type signature via argument types, and the - // corresponding return type stored in CALL_INDIRECT record. + // Note: The difference between CALL and CALL_INDIRECT is that CALL has a + // reference to an explicit function declaration, while the CALL_INDIRECT + // is just an address. For CALL, we can infer the return type by looking up + // the type signature associated with the function declaration. For + // CALL_INDIRECT we can only infer the type signature via argument types, + // and the corresponding return type stored in CALL_INDIRECT record. Ice::SizeT ParamsStartIndex = 2; if (Record.GetCode() == naclbitc::FUNC_CODE_INST_CALL) { if (!isValidRecordSizeAtLeast(2, "call")) @@ -2763,8 +2738,8 @@ Ice::GlobalContext *getContext() { return getTranslator().getContext(); } - // Returns true if the type to use for succeeding constants is defined. - // If false, also generates an error message. + // Returns true if the type to use for succeeding constants is defined. If + // false, also generates an error message. bool isValidNextConstantType() { if (NextConstantType != Ice::IceType_void) return true; @@ -2887,8 +2862,8 @@ void setValueName(NaClBcIndexSize_t Index, StringType &Name) override; void setBbName(NaClBcIndexSize_t Index, StringType &Name) override; - // Reports that the assignment of Name to the value associated with - // index is not possible, for the given Context. + // Reports that the assignment of Name to the value associated with index is + // not possible, for the given Context. void reportUnableToAssign(const char *Context, NaClBcIndexSize_t Index, StringType &Name) { std::string Buffer; @@ -2976,10 +2951,10 @@ // and have generated global constant initializers. bool GlobalDeclarationNamesAndInitializersInstalled = false; - // Generates names for unnamed global addresses (i.e. functions and - // global variables). Then lowers global variable declaration - // initializers to the target. May be called multiple times. Only - // the first call will do the installation. + // Generates names for unnamed global addresses (i.e. functions and global + // variables). Then lowers global variable declaration initializers to the + // target. May be called multiple times. Only the first call will do the + // installation. void installGlobalNamesAndGlobalVarInitializers() { if (!GlobalDeclarationNamesAndInitializersInstalled) { Context->installGlobalNames(); @@ -3130,11 +3105,11 @@ void PNaClTranslator::translate(const std::string &IRFilename, std::unique_ptr<MemoryObject> &&MemObj) { - // On error, we report_fatal_error to avoid destroying the MemObj. - // That may still be in use by IceBrowserCompileServer. Otherwise, - // we need to change the MemObj to be ref-counted, or have a wrapper, - // or simply leak. We also need a hook to tell the IceBrowserCompileServer - // to unblock its QueueStreamer. + // On error, we report_fatal_error to avoid destroying the MemObj. That may + // still be in use by IceBrowserCompileServer. Otherwise, we need to change + // the MemObj to be ref-counted, or have a wrapper, or simply leak. We also + // need a hook to tell the IceBrowserCompileServer to unblock its + // QueueStreamer. // https://code.google.com/p/nativeclient/issues/detail?id=4163 Ostream &ErrStream = getContext()->getStrError(); // Read header and verify it is good.
diff --git a/src/PNaClTranslator.h b/src/PNaClTranslator.h index 24a627b..8a045ad 100644 --- a/src/PNaClTranslator.h +++ b/src/PNaClTranslator.h
@@ -37,8 +37,8 @@ ~PNaClTranslator() override = default; /// Reads the PNaCl bitcode file and translates to ICE, which is then - /// converted to machine code. Sets ErrorStatus to 1 if any errors - /// occurred. Takes ownership of the MemoryObject. + /// converted to machine code. Sets ErrorStatus to 1 if any errors occurred. + /// Takes ownership of the MemoryObject. void translate(const std::string &IRFilename, std::unique_ptr<llvm::MemoryObject> &&MemoryObject);
diff --git a/src/README.SIMD.rst b/src/README.SIMD.rst index 58f25d9..f8cf08f 100644 --- a/src/README.SIMD.rst +++ b/src/README.SIMD.rst
@@ -1,13 +1,14 @@ Missing support =============== -* The PNaCl LLVM backend expands shufflevector operations into - sequences of insertelement and extractelement operations. For - instance: +* The PNaCl LLVM backend expands shufflevector operations into sequences of + insertelement and extractelement operations. For instance: define <4 x i32> @shuffle(<4 x i32> %arg1, <4 x i32> %arg2) { entry: - %res = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> <i32 4, i32 5, i32 0, i32 1> + %res = shufflevector <4 x i32> %arg1, + <4 x i32> %arg2, + <4 x i32> <i32 4, i32 5, i32 0, i32 1> ret <4 x i32> %res } @@ -30,38 +31,34 @@ shuffle operations where appropriate. * Add support for vector constants in the backend. The current code - materializes the vector constants it needs (eg. for performing icmp - on unsigned operands) using register operations, but this should be - changed to loading them from a constant pool if the register - initialization is too complicated (such as in - TargetX8632::makeVectorOfHighOrderBits()). + materializes the vector constants it needs (eg. for performing icmp on + unsigned operands) using register operations, but this should be changed to + loading them from a constant pool if the register initialization is too + complicated (such as in TargetX8632::makeVectorOfHighOrderBits()). -* [x86 specific] llvm-mc does not allow lea to take a mem128 memory - operand when assembling x86-32 code. The current - InstX8632Lea::emit() code uses Variable::asType() to convert any - mem128 Variables into a compatible memory operand type. However, the - emit code does not do any conversions of OperandX8632Mem, so if an - OperandX8632Mem is passed to lea as mem128 the resulting code will - not assemble. One way to fix this is by implementing +* [x86 specific] llvm-mc does not allow lea to take a mem128 memory operand + when assembling x86-32 code. The current InstX8632Lea::emit() code uses + Variable::asType() to convert any mem128 Variables into a compatible memory + operand type. However, the emit code does not do any conversions of + OperandX8632Mem, so if an OperandX8632Mem is passed to lea as mem128 the + resulting code will not assemble. One way to fix this is by implementing OperandX8632Mem::asType(). -* [x86 specific] Lower shl with <4 x i32> using some clever float - conversion: +* [x86 specific] Lower shl with <4 x i32> using some clever float conversion: http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20100726/105087.html -* [x86 specific] Add support for using aligned mov operations - (movaps). This will require passing alignment information to loads - and stores. +* [x86 specific] Add support for using aligned mov operations (movaps). This + will require passing alignment information to loads and stores. x86 SIMD Diversification ======================== -* Vector "bitwise" operations have several variant instructions: the - AND operation can be implemented with pand, andpd, or andps. This - pattern also holds for ANDN, OR, and XOR. +* Vector "bitwise" operations have several variant instructions: the AND + operation can be implemented with pand, andpd, or andps. This pattern also + holds for ANDN, OR, and XOR. -* Vector "mov" instructions can be diversified (eg. movdqu instead of - movups) at the cost of a possible performance penalty. +* Vector "mov" instructions can be diversified (eg. movdqu instead of movups) + at the cost of a possible performance penalty. -* Scalar FP arithmetic can be diversified by performing the operations - with the vector version of the instructions. +* Scalar FP arithmetic can be diversified by performing the operations with the + vector version of the instructions.
diff --git a/src/main.cpp b/src/main.cpp index a6cb490..fa5bb06 100644 --- a/src/main.cpp +++ b/src/main.cpp
@@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file defines the entry point for translating PNaCl bitcode into -/// native code. +/// This file defines the entry point for translating PNaCl bitcode into native +/// code. /// //===----------------------------------------------------------------------===// @@ -22,8 +22,8 @@ Ice::Compiler Comp; // Can only compile the BrowserCompileServer w/ the NaCl compiler. #if PNACL_BROWSER_TRANSLATOR - // There are no real commandline arguments in the browser case. - // They are supplied via IPC. + // There are no real commandline arguments in the browser case. They are + // supplied via IPC. assert(argc == 1); (void)argc; (void)argv;