blob: 1848b87c0ce3874290a2a042f55a756e08bdce5e [file] [log] [blame]
//===- NaClBitcodeParser.h -----------------------------------*- C++ -*-===//
// Low-level bitcode driver to parse PNaCl bitcode files.
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Parses and processes low-level PNaCl bitcode files. Defines class
// NaClBitcodeParser.
//
// The concepts of PNaCl bitcode files are basically the same as for
// LLVM bitcode files (see http://llvm.org/docs/BitCodeFormat.html for
// details).
//
// The bitstream format is an abstract encoding of structured data,
// very similar to XML in some ways. Like XML, bitstream files contain
// tags, and nested structures, and you can parse the file without
// having to understand the tags. Unlike XML, the bitstream format is
// a binary encoding, and provides a mechanism for the file to
// self-describe "abbreviations". Abbreviations are effectively size
// optimizations for the content.
//
// The bitcode file is conceptually a sequence of "blocks", defining
// the content. Blocks contain a sequence of records and
// blocks. Nested content is defined using nested blocks. A (data)
// "record" is a tag, and a vector of (unsigned integer) values.
//
// Blocks are identified using Block IDs. Each kind of block has a
// unique block "ID". Records have two elements:
//
// a) A "code" identifying what type of record it is.
// b) A vector of "values" defining the contents of the record.
//
// The bitstream "reader" (defined in NaClBitstreamReader.h) defines
// the implementation that converts the low-level bit file into
// records and blocks. The bit stream is processed by moving a
// "cursor" over the sequence of bits.
//
// The bitstream reader assumes that each block/record is read in by
// first reading the "entry". The entry defines whether it corresponds
// to one of the following:
//
// a) At the beginning of a (possibly nested) block
// b) At the end of the current block.
// c) The input defines an abberviation.
// d) The input defines a record.
//
// An entry contains two values, a "kind" and an "ID". The kind
// defines which of the four cases above occurs. The ID provides
// identifying information on how to further process the input. For
// case (a), the ID is the identifier associated with the the block
// being processed. For case (b) and (c) the ID is ignored. For case
// (d) the ID identifies the abbreviation that should be used to parse
// the values.
//
// The class NaClBitcodeParser defines a bitcode parser that extracts
// the blocks and records, which are then processed using virtual
// callbacks. In general, you will want to implement derived classes
// for each type of block, so that the corresponding data is processed
// appropriately.
//
// The class NaClBitcodeParser parses a bitcode block, and defines a
// set of callbacks for that block, including:
//
// a) EnterBlock: What to do once we have entered the block.
// b) ProcessRecord: What to do with each parsed record.
// c) ParseBlock: Parse the (nested) block with the given ID.
// d) ExitBlock: What to do once we have finished processing the block.
//
// Note that a separate instance of NaClBitcodeParser (or a
// corresponding derived class) is created for each nested block. Each
// instance is responsible for only parsing a single block. Method
// ParseBlock creates new instances to parse nested blocks. Method
// GetEnclosingParser() can be used to refer to the parser associated
// with the enclosing block.
//
// Currently, the default processing of abbreviations is handled by
// the PNaCl bitstream reader, rather than by the parser.
//
// If you need to process abbreviations processed by the PNaCl
// bitstream reader, you must explicitly define a
// NaClBitcodeParserListener to listen (within the bitstream reader),
// and make appropriate call backs to the NaClBitcodeParser.
// The listener is glued to parsers using method SetListener.
//
// TODO(kschimpf): Define an intermediate derived class of
// NaClBitcodeParser that defines callbacks based on the actual
// structure of PNaCl bitcode files. That is, it has callbacks for
// each of the types of blocks (i.e. module, types, global variables,
// function, symbol tables etc). This derivied class can then be used
// as the base class for the bitcode reader.
// ===----------------------------------------------------------------------===//
#ifndef LLVM_BITCODE_NACL_NACLBITCODEPARSER_H
#define LLVM_BITCODE_NACL_NACLBITCODEPARSER_H
#include "llvm/Bitcode/NaCl/NaClBitcodeDefs.h"
#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
namespace llvm {
class NaClBitcodeRecord;
class NaClBitcodeParser;
class NaClBitcodeParserListener;
// Defines the base class for data extracted from the input bitstream
// (i.e blocks and records).
class NaClBitcodeData {
void operator=(const NaClBitcodeData &) = delete;
public:
/// Create data element to be read from input cursor.
explicit NaClBitcodeData(NaClBitstreamCursor &Cursor)
: Cursor(Cursor), StartBit(Cursor.GetCurrentBitNo()) {}
/// Create copy of the given data element.
explicit NaClBitcodeData(const NaClBitcodeData &Data)
: Cursor(Data.Cursor), StartBit(Data.StartBit) {}
/// Returns the bitstream reader being used.
NaClBitstreamReader &GetReader() const {
return *Cursor.getBitStreamReader();
}
/// Returns the cursor position within the bitstream.
NaClBitstreamCursor &GetCursor() const { return Cursor; }
/// Returns the number of bits defined by the data.
uint64_t GetNumBits() const {
return GetCursor().GetCurrentBitNo() - StartBit;
}
/// Returns the first bit of the stream data.
uint64_t GetStartBit() const { return StartBit; }
protected:
/// Change the start bit for the data to the new value.
void SetStartBit(uint64_t NewValue) { StartBit = NewValue; }
private:
// The bitstream cursor defining location within the bitcode file.
NaClBitstreamCursor &Cursor;
// Start bit for the record.
uint64_t StartBit;
};
/// Models the block defined by a (begin) block record, through the
/// (end) block record.
class NaClBitcodeBlock : public NaClBitcodeData {
NaClBitcodeBlock(const NaClBitcodeBlock &) = delete;
void operator=(const NaClBitcodeBlock &) = delete;
public:
/// Given the found (begin) block record for block BlockID, create
/// the corresponding data associated with that block.
NaClBitcodeBlock(unsigned BlockID, const NaClBitcodeRecord &Record);
/// Create block data for block BlockID, using the input cursor.
NaClBitcodeBlock(unsigned BlockID, NaClBitstreamCursor &Cursor)
: NaClBitcodeData(Cursor), BlockID(BlockID), EnclosingBlock(0) {
LocalStartBit = GetStartBit();
}
/// Print the contents out to the given stream.
void Print(raw_ostream &os) const;
/// Returns pointer to the enclosing block.
const NaClBitcodeBlock *GetEnclosingBlock() const { return EnclosingBlock; }
/// Returns the block ID of the block.
unsigned GetBlockID() const { return BlockID; }
/// Returns the number of bits in the block associated with the
/// bitcode parser parsing this block, excluding nested blocks.
unsigned GetLocalNumBits() const {
return GetCursor().GetCurrentBitNo() - LocalStartBit;
}
protected:
// The block ID associated with this record.
unsigned BlockID;
// The enclosing block, if defined.
const NaClBitcodeBlock *EnclosingBlock;
// Start bit for the block, updated to skip nested blocks.
uint64_t LocalStartBit;
// Note: We friend class NaClBitcodeParser, so that it can
// update field LocalStartBit.
friend class NaClBitcodeParser;
};
typedef NaClBitcodeRecordVector NaClRecordVector;
class NaClBitcodeRecordData {
NaClBitcodeRecordData &operator=(const NaClBitcodeRecordData &) = delete;
public:
NaClBitcodeRecordData(unsigned Code, const NaClRecordVector &Values)
: Code(Code), Values(Values) {}
explicit NaClBitcodeRecordData(const NaClBitcodeRecordData &Record)
: Code(Record.Code), Values(Record.Values) {}
NaClBitcodeRecordData() : Code(0) {}
// The selector code associated with the record.
unsigned Code;
// The sequence of values defining the parsed record.
NaClRecordVector Values;
void Print(raw_ostream &strm) const;
};
inline raw_ostream &operator<<(raw_ostream &Strm,
const NaClBitcodeRecordData &Data) {
Data.Print(Strm);
return Strm;
}
/// Simple container class to convert the values of the corresponding
/// read record to a simpler form, only containing values.
struct NaClBitcodeValues {
public:
NaClBitcodeValues(const NaClBitcodeRecordData &Record) : Record(Record) {}
size_t size() const { return Record.Values.size() + 1; }
uint64_t operator[](size_t index) const {
return index == 0 ? Record.Code : Record.Values[index - 1];
}
private:
const NaClBitcodeRecordData &Record;
};
/// Defines the data associated with reading a block record in the
/// PNaCl bitcode stream.
class NaClBitcodeRecord : public NaClBitcodeData {
public:
/// Type for vector of values representing a record.
typedef NaClRecordVector RecordVector;
/// Creates a bitcode record, starting at the position defined
/// by cursor.
explicit NaClBitcodeRecord(const NaClBitcodeBlock &Block)
: NaClBitcodeData(Block.GetCursor()), Block(Block) {}
/// Print the contents out to the given stream.
void Print(raw_ostream &os) const;
/// The block the record appears in.
const NaClBitcodeBlock &GetBlock() const { return Block; }
/// Returns the block ID associated with the record.
unsigned GetBlockID() const { return Block.GetBlockID(); }
/// Returns the kind of entry read from the input stream.
unsigned GetEntryKind() const { return Entry.Kind; }
/// Returns the code value (i.e. selector) associated with the
/// record.
unsigned GetCode() const { return Data.Code; }
/// Returns the EntryID (e.g. abbreviation if !=
/// naclbitc::UNABBREV_RECORD) associated with the record. Note:
/// for block-enter, block-exit, and define-abbreviation, EntryID is
/// not the corresponding abbreviation.
unsigned GetEntryID() const { return Entry.ID; }
/// Returns the (value) record associated with the read record.
const RecordVector &GetValues() const { return Data.Values; }
/// Allows lower level access to data representing record.
const NaClBitcodeRecordData &GetRecordData() const { return Data; }
/// Returns true if the record was read using an abbreviation.
bool UsedAnAbbreviation() const {
return GetEntryKind() == NaClBitstreamEntry::Record &&
GetEntryID() != naclbitc::UNABBREV_RECORD;
}
/// Returns the abbrevation index used to read the record.
/// Returns naclbitc::UNABBREV_RECORD if not applicable.
unsigned GetAbbreviationIndex() const {
return UsedAnAbbreviation()
? GetEntryID()
: static_cast<unsigned>(naclbitc::UNABBREV_RECORD);
}
/// Destructively change the abbreviation ID to the given value.
void SetAbbreviationIndex(unsigned Index) { Entry.ID = Index; }
protected:
// The block associated with the record.
const NaClBitcodeBlock &Block;
// The data of the record.
NaClBitcodeRecordData Data;
// The entry (i.e. value(s) preceding the record that define what
// value comes next).
NaClBitstreamEntry Entry;
private:
// Allows class NaClBitcodeParser to read values into the
// record, thereby hiding the details of how to read values.
friend class NaClBitcodeParser;
friend class NaClBitcodeParserListener;
/// Read bitstream entry. Defines what construct appears next in the
/// bitstream.
void ReadEntry() {
SetStartBit(GetCursor().GetCurrentBitNo());
Entry =
GetCursor().advance(NaClBitstreamCursor::AF_DontAutoprocessAbbrevs, 0);
}
/// Reads in a record's values, if the entry defines a record (Must
/// be called after ReadEntry).
void ReadValues() {
Data.Values.clear();
Data.Code = GetCursor().readRecord(Entry.ID, Data.Values);
}
NaClBitcodeRecord(const NaClBitcodeRecord &Rcd) = delete;
void operator=(const NaClBitcodeRecord &Rcd) = delete;
};
inline raw_ostream &operator<<(raw_ostream &Strm,
const NaClBitcodeRecord &Record) {
Record.Print(Strm);
return Strm;
}
/// Defines a listener to handle abbreviations within a bitcode file.
/// In particular, abbreviations and the BlockInfo block are made more
/// explicit, and then sent to the parser through virtuals
/// ProcessAbbreviation and SetBID.
class NaClBitcodeParserListener : public NaClAbbrevListener {
friend class NaClBitcodeParser;
public:
// Constructs a listener for the given parser. Note: All nested
// parsers automatically inherit this listener.
NaClBitcodeParserListener(NaClBitcodeParser *Parser)
: Parser(Parser), GlobalBlockID(naclbitc::BLOCKINFO_BLOCK_ID) {}
virtual ~NaClBitcodeParserListener() {}
private:
virtual void BeginBlockInfoBlock(unsigned NumWords);
virtual void SetBID();
virtual void EndBlockInfoBlock();
virtual void ProcessAbbreviation(NaClBitCodeAbbrev *Abbrev, bool IsLocal);
/// The block parser currently being listened to.
NaClBitcodeParser *Parser;
/// The block ID to use if a global abbreviation. Note: This field is
/// updated by calls to method SetBID.
unsigned GlobalBlockID;
};
/// Parses a block in the PNaCl bitcode stream.
class NaClBitcodeParser {
// Allow listener privledges, so that it can update/call the parser
// using a clean API.
friend class NaClBitcodeParserListener;
// Implements an error handler for errors in the bitstream reader.
// Redirects bitstream reader errors to corresponding parrser error
// reporting function.
class ErrorHandler : public NaClBitstreamCursor::ErrorHandler {
NaClBitcodeParser *Parser;
public:
ErrorHandler(NaClBitcodeParser *Parser, NaClBitstreamCursor &Cursor)
: NaClBitstreamCursor::ErrorHandler(Cursor), Parser(Parser) {}
LLVM_ATTRIBUTE_NORETURN
void Fatal(const std::string &ErrorMessage) const final {
Parser->FatalAt(getCurrentBitNo(), ErrorMessage);
llvm_unreachable("GCC treats noreturn virtual functions as returning");
}
~ErrorHandler() override {}
};
public:
// Creates a parser to parse the the block at the given cursor in
// the PNaCl bitcode stream. This instance is a "dummy" instance
// that starts the parser.
explicit NaClBitcodeParser(NaClBitstreamCursor &Cursor)
: EnclosingParser(0), Block(ILLEGAL_BLOCK_ID, Cursor), Record(Block),
Listener(0), ErrStream(&errs()) {
std::unique_ptr<NaClBitstreamCursor::ErrorHandler> ErrHandler(
new ErrorHandler(this, Cursor));
Cursor.setErrorHandler(ErrHandler);
}
virtual ~NaClBitcodeParser();
/// Reads the (top-level) block associated with the given block
/// record at the stream cursor. Returns true if unable to parse.
/// Can be called multiple times to parse multiple blocks.
bool Parse();
// Called once the bitstream reader has entered the corresponding
// subblock. Argument NumWords is set to the number of words in the
// corresponding subblock.
virtual void EnterBlock(unsigned /*NumWords*/) {}
// Called when the corresponding EndBlock of the block being parsed
// is found.
virtual void ExitBlock() {}
// Called after each record (within the block) is read (into field Record).
virtual void ProcessRecord() {}
// Called if a SetBID record is encountered in the BlockInfo block,
// and the parser has a listener.
virtual void SetBID() {}
// Called to process an abbreviation if the parser has a listener.
virtual void ProcessAbbreviation(unsigned /*BlockID*/,
NaClBitCodeAbbrev * /*Abbrev*/,
bool /*IsLocal*/) {}
// Creates an instance of the NaClBitcodeParser to use to parse the
// block with the given block ID, and then call's method
// ParseThisBlock() to parse the corresponding block. Note:
// Each derived class should define it's own version of this
// method, following the pattern below.
virtual bool ParseBlock(unsigned BlockID) {
// Default implementation just builds a parser that does nothing.
NaClBitcodeParser Parser(BlockID, this);
return Parser.ParseThisBlock();
}
// Changes the stream to print errors to, and returns the old error stream.
// There are two use cases:
// 1) To change (from the default errs()) inside the constructor of the
// derived class. In this context, it will be used for all error
// messages for the derived class.
// 2) Temporarily modify it for a single error message.
raw_ostream &setErrStream(raw_ostream &Stream) {
raw_ostream &OldErrStream = *ErrStream;
ErrStream = &Stream;
return OldErrStream;
}
// Called when an error occurs. BitPosition is the bit position the
// error was found, and Message is the error to report. Always
// returns true (the error return value of Parse). Level is
// the severity of the error.
virtual bool ErrorAt(naclbitc::ErrorLevel Level, uint64_t BitPosition,
const std::string &Message);
bool ErrorAt(uint64_t BitPosition, const std::string &Message) {
return ErrorAt(naclbitc::Error, BitPosition, Message);
}
// Called when an error occurs. Message is the error to
// report. Always returns true (the error return value of Parse).
bool Error(const std::string &Message) {
return ErrorAt(Record.GetStartBit(), Message);
}
// Called when a fatal error occurs. BitPosition is the bit position
// the error was found, and Message is the error to report. Does not
// return.
LLVM_ATTRIBUTE_NORETURN
void FatalAt(uint64_t BitPosition, const std::string &Message) {
ErrorAt(naclbitc::Fatal, BitPosition, Message);
llvm_unreachable("Fatal errors should not return");
}
// Called when a fatal error occurs. Message is the error to
// report. Does not return.
LLVM_ATTRIBUTE_NORETURN
void Fatal(const std::string &Message) {
FatalAt(Record.GetStartBit(), Message);
llvm_unreachable("GCC treats noreturn virtual functions as returning");
}
// Generates fatal generic error message.
LLVM_ATTRIBUTE_NORETURN
void Fatal() { Fatal("Fatal error occurred!"); }
// Returns the number of bits in this block, including nested blocks.
unsigned GetBlockNumBits() const { return Block.GetNumBits(); }
// Returns the number of bits in this block, excluding nested blocks.
unsigned GetBlockLocalNumBits() const { return Block.GetLocalNumBits(); }
/// Returns the block ID associated with the Parser.
unsigned GetBlockID() const { return Block.GetBlockID(); }
NaClBitcodeBlock &GetBlock() { return Block; }
/// Returns the enclosing parser of this block.
NaClBitcodeParser *GetEnclosingParser() const {
// Note: The top-level parser instance is a dummy instance
// and is not considered an enclosing parser.
return EnclosingParser->EnclosingParser ? EnclosingParser : 0;
}
// Parses the block using the parser defined by
// ParseBlock(unsigned). Returns true if unable to parse the
// block. Note: Should only be called by virtual ParseBlock(unsigned).
bool ParseThisBlock() {
bool Results;
if (Listener) {
NaClBitcodeParser *CallingParser = Listener->Parser;
Listener->Parser = this;
Results = ParseThisBlockInternal();
Listener->Parser = CallingParser;
} else {
Results = ParseThisBlockInternal();
}
return Results;
}
/// Skips the current block, assuming the parser is at the beginning
/// of the block. That is, Record.GetEntryKind() equals
/// NaClBitstreamEntry::SubBlock. Returns false if
/// successful. Otherwise returns 1.
bool SkipBlock() {
if (Record.GetEntryKind() != NaClBitstreamEntry::SubBlock)
return Error("SkipBlock on non-block record");
return Record.GetCursor().SkipBlock();
}
protected:
// The containing parser.
NaClBitcodeParser *EnclosingParser;
// The block the parser is associated with.
NaClBitcodeBlock Block;
// The current record (within the block) being processed.
NaClBitcodeRecord Record;
// The listener (if any) to use.
NaClBitcodeParserListener *Listener;
// The error stream to use if non-null (uses errs() if null).
raw_ostream *ErrStream;
// Creates a block parser to parse the block associated with the bitcode entry
// that defines the beginning of a block. This instance actually parses the
// corresponding block. Inherits the bitstream cursor from the
// EnclosingParser.
NaClBitcodeParser(unsigned BlockID, NaClBitcodeParser *EnclosingParser)
: EnclosingParser(EnclosingParser),
Block(BlockID, EnclosingParser->Record), Record(Block),
Listener(EnclosingParser->Listener),
ErrStream(EnclosingParser->ErrStream) {}
// Same as above, but use the supplied bitstream cursor (instead of
// inheriting from the enclosing parser). This constructor allows
// parallel parsing of subblocks, by allowing the caller to generate
// a different Cursor for each block to be parsed in parallel.
NaClBitcodeParser(unsigned BlockID, NaClBitcodeParser *EnclosingParser,
NaClBitstreamCursor &Cursor)
: EnclosingParser(EnclosingParser), Block(BlockID, Cursor), Record(Block),
Listener(EnclosingParser->Listener),
ErrStream(EnclosingParser->ErrStream) {}
/// Defines the listener for this block, and all enclosing blocks,
/// to be the given listener. Should be set in the constructor.
void SetListener(NaClBitcodeParserListener *UseListener) {
Listener = UseListener;
}
private:
// Special constant identifying the top-level instance.
static const unsigned ILLEGAL_BLOCK_ID = UINT_MAX;
// Parses the block. Returns true if unable to parse the
// block. Note: Should only be called by virtual ParseThisBlock.
bool ParseThisBlockInternal() {
bool Results;
if (GetBlockID() == naclbitc::BLOCKINFO_BLOCK_ID) {
Results = ParseBlockInfoInternal();
} else {
Results = ParseBlockInternal();
ExitBlock();
}
return Results;
}
// Parses a BlockInfo block, where processing is handled through
// a listener in the bitstream reader.
bool ParseBlockInfoInternal();
// Parses the non-BlockInfo block. Returns true if unable to parse the
// block.
bool ParseBlockInternal();
void operator=(const NaClBitcodeParser &Parser) = delete;
NaClBitcodeParser(const NaClBitcodeParser &Parser) = delete;
};
} // namespace llvm
#endif