blob: 4c0fa8f7870606a3fc54e8c3d1766c9c3cb4bcec [file] [log] [blame]
// Copyright (C) 2019 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package parser implements a SPIR-V assembly parser.
package parser
import (
"fmt"
"io"
"log"
"strings"
"unicode"
"unicode/utf8"
"github.com/KhronosGroup/SPIRV-Tools/utils/vscode/src/schema"
)
// Type is an enumerator of token types.
type Type int
// Type enumerators
const (
Ident Type = iota // Foo
PIdent // %32, %foo
Integer
Float
String
Operator
Comment
Newline
)
func (t Type) String() string {
switch t {
case Ident:
return "Ident"
case PIdent:
return "PIdent"
case Integer:
return "Integer"
case Float:
return "Float"
case String:
return "String"
case Operator:
return "Operator"
case Comment:
return "Comment"
default:
return "<unknown>"
}
}
// Token represents a single lexed token.
type Token struct {
Type Type
Range Range
}
func (t Token) String() string { return fmt.Sprintf("{%v %v}", t.Type, t.Range) }
// Text returns the tokens text from the source.
func (t Token) Text(lines []string) string { return t.Range.Text(lines) }
// Range represents an interval in a text file.
type Range struct {
Start Position
End Position
}
func (r Range) String() string { return fmt.Sprintf("[%v %v]", r.Start, r.End) }
// Text returns the text for the given Range in the provided lines.
func (r Range) Text(lines []string) string {
sl, sc := r.Start.Line-1, r.Start.Column-1
if sl < 0 || sc < 0 || sl > len(lines) || sc > len(lines[sl]) {
return fmt.Sprintf("<invalid start position %v>", r.Start)
}
el, ec := r.End.Line-1, r.End.Column-1
if el < 0 || ec < 0 || el > len(lines) || ec > len(lines[sl]) {
return fmt.Sprintf("<invalid end position %v>", r.End)
}
sb := strings.Builder{}
if sl != el {
sb.WriteString(lines[sl][sc:])
for l := sl + 1; l < el; l++ {
sb.WriteString(lines[l])
}
sb.WriteString(lines[el][:ec])
} else {
sb.WriteString(lines[sl][sc:ec])
}
return sb.String()
}
// Contains returns true if p is in r.
func (r Range) Contains(p Position) bool {
return !(p.LessThan(r.Start) || p.GreaterThan(r.End))
}
func (r *Range) grow(o Range) {
if !r.Start.IsValid() || o.Start.LessThan(r.Start) {
r.Start = o.Start
}
if !r.End.IsValid() || o.End.GreaterThan(r.End) {
r.End = o.End
}
}
// Position holds a line and column position in a text file.
type Position struct {
Line, Column int
}
func (p Position) String() string { return fmt.Sprintf("%v:%v", p.Line, p.Column) }
// IsValid returns true if the position has a line and column greater than 1.
func (p Position) IsValid() bool { return p.Line > 0 && p.Column > 0 }
// LessThan returns true iff o is before p.
func (p Position) LessThan(o Position) bool {
switch {
case !p.IsValid() || !o.IsValid():
return false
case p.Line < o.Line:
return true
case p.Line > o.Line:
return false
case p.Column < o.Column:
return true
default:
return false
}
}
// GreaterThan returns true iff o is greater than p.
func (p Position) GreaterThan(o Position) bool {
switch {
case !p.IsValid() || !o.IsValid():
return false
case p.Line > o.Line:
return true
case p.Line < o.Line:
return false
case p.Column > o.Column:
return true
default:
return false
}
}
type lexer struct {
source string
lexerState
diags []Diagnostic
e error
}
type lexerState struct {
offset int // byte offset in source
toks []*Token // all the lexed tokens
pos Position // current position
}
// err appends an fmt.Printf style error into l.diags for the given token.
func (l *lexer) err(tok *Token, msg string, args ...interface{}) {
rng := Range{}
if tok != nil {
rng = tok.Range
}
l.diags = append(l.diags, Diagnostic{
Range: rng,
Severity: SeverityError,
Message: fmt.Sprintf(msg, args...),
})
}
// next returns the next rune, or io.EOF if the last rune has already been
// consumed.
func (l *lexer) next() rune {
if l.offset >= len(l.source) {
l.e = io.EOF
return 0
}
r, n := utf8.DecodeRuneInString(l.source[l.offset:])
l.offset += n
if n == 0 {
l.e = io.EOF
return 0
}
if r == '\n' {
l.pos.Line++
l.pos.Column = 1
} else {
l.pos.Column++
}
return r
}
// save returns the current lexerState.
func (l *lexer) save() lexerState {
return l.lexerState
}
// restore restores the current lexer state with s.
func (l *lexer) restore(s lexerState) {
l.lexerState = s
}
// pident processes the PIdent token at the current position.
// The lexer *must* know the next token is a PIdent before calling.
func (l *lexer) pident() {
tok := &Token{Type: PIdent, Range: Range{Start: l.pos, End: l.pos}}
if r := l.next(); r != '%' {
log.Fatalf("lexer expected '%%', got '%v'", r)
return
}
for l.e == nil {
s := l.save()
r := l.next()
if !isAlphaNumeric(r) && r != '_' {
l.restore(s)
break
}
}
tok.Range.End = l.pos
l.toks = append(l.toks, tok)
}
// numberOrIdent processes the Ident, Float or Integer token at the current
// position.
func (l *lexer) numberOrIdent() {
const Unknown Type = -1
tok := &Token{Type: Unknown, Range: Range{Start: l.pos, End: l.pos}}
loop:
for l.e == nil {
s := l.save()
r := l.next()
switch {
case r == '-', r == '+', isNumeric(r):
continue
case isAlpha(r), r == '_':
switch tok.Type {
case Unknown:
tok.Type = Ident
case Float, Integer:
l.err(tok, "invalid number")
return
}
case r == '.':
switch tok.Type {
case Unknown:
tok.Type = Float
default:
l.restore(s)
break loop
}
default:
if tok.Type == Unknown {
tok.Type = Integer
}
l.restore(s)
break loop
}
}
tok.Range.End = l.pos
l.toks = append(l.toks, tok)
}
// string processes the String token at the current position.
// The lexer *must* know the next token is a String before calling.
func (l *lexer) string() {
tok := &Token{Type: String, Range: Range{Start: l.pos, End: l.pos}}
if r := l.next(); r != '"' {
log.Fatalf("lexer expected '\"', got '%v'", r)
return
}
escape := false
for l.e == nil {
switch l.next() {
case '"':
if !escape {
tok.Range.End = l.pos
l.toks = append(l.toks, tok)
return
}
case '\\':
escape = !escape
default:
escape = false
}
}
}
// operator processes the Operator token at the current position.
// The lexer *must* know the next token is a Operator before calling.
func (l *lexer) operator() {
tok := &Token{Type: Operator, Range: Range{Start: l.pos, End: l.pos}}
for l.e == nil {
switch l.next() {
case '=', '|':
tok.Range.End = l.pos
l.toks = append(l.toks, tok)
return
}
}
}
// lineComment processes the Comment token at the current position.
// The lexer *must* know the next token is a Comment before calling.
func (l *lexer) lineComment() {
tok := &Token{Type: Comment, Range: Range{Start: l.pos, End: l.pos}}
if r := l.next(); r != ';' {
log.Fatalf("lexer expected ';', got '%v'", r)
return
}
for l.e == nil {
s := l.save()
switch l.next() {
case '\n':
l.restore(s)
tok.Range.End = l.pos
l.toks = append(l.toks, tok)
return
}
}
}
// newline processes the Newline token at the current position.
// The lexer *must* know the next token is a Newline before calling.
func (l *lexer) newline() {
tok := &Token{Type: Newline, Range: Range{Start: l.pos, End: l.pos}}
if r := l.next(); r != '\n' {
log.Fatalf("lexer expected '\n', got '%v'", r)
return
}
tok.Range.End = l.pos
l.toks = append(l.toks, tok)
}
// lex returns all the tokens and diagnostics after lexing source.
func lex(source string) ([]*Token, []Diagnostic, error) {
l := lexer{source: source, lexerState: lexerState{pos: Position{1, 1}}}
lastPos := Position{}
for l.e == nil {
// Integrity check that the parser is making progress
if l.pos == lastPos {
log.Panicf("Parsing stuck at %v", l.pos)
}
lastPos = l.pos
s := l.save()
r := l.next()
switch {
case r == '%':
l.restore(s)
l.pident()
case r == '+' || r == '-' || r == '_' || isAlphaNumeric(r):
l.restore(s)
l.numberOrIdent()
case r == '"':
l.restore(s)
l.string()
case r == '=', r == '|':
l.restore(s)
l.operator()
case r == ';':
l.restore(s)
l.lineComment()
case r == '\n':
l.restore(s)
l.newline()
}
}
if l.e != nil && l.e != io.EOF {
return nil, nil, l.e
}
return l.toks, l.diags, nil
}
func isNumeric(r rune) bool { return unicode.IsDigit(r) }
func isAlpha(r rune) bool { return unicode.IsLetter(r) }
func isAlphaNumeric(r rune) bool { return isAlpha(r) || isNumeric(r) }
type parser struct {
lines []string // all source lines
toks []*Token // all tokens
diags []Diagnostic // parser emitted diagnostics
idents map[string]*Identifier // identifiers by name
mappings map[*Token]interface{} // tokens to semantic map
extInstImports map[string]schema.OpcodeMap // extension imports by identifier
insts []*Instruction // all instructions
}
func (p *parser) parse() error {
for i := 0; i < len(p.toks); {
if p.newline(i) || p.comment(i) {
i++
continue
}
if n := p.instruction(i); n > 0 {
i += n
} else {
p.unexpected(i)
i++
}
}
return nil
}
// instruction parses the instruction starting at the i'th token.
func (p *parser) instruction(i int) (n int) {
inst := &Instruction{}
switch {
case p.opcode(i) != nil:
inst.Opcode = p.opcode(i)
inst.Tokens = []*Token{p.tok(i)}
p.mappings[p.tok(i)] = inst
n++
case p.opcode(i+2) != nil: // try '%id' '='
inst.Result, inst.Opcode = p.pident(i), p.opcode(i+2)
if inst.Result == nil || p.operator(i+1) != "=" {
return 0
}
n += 3
inst.Tokens = []*Token{p.tok(i), p.tok(i + 1), p.tok(i + 2)}
p.mappings[p.tok(i+2)] = inst
default:
return
}
expectsResult := len(inst.Opcode.Operands) > 0 && IsResult(inst.Opcode.Operands[0].Kind)
operands := inst.Opcode.Operands
switch {
case inst.Result != nil && !expectsResult:
p.err(inst.Result, "'%s' does not have a result", inst.Opcode.Opname)
return
case inst.Result == nil && expectsResult:
p.err(p.tok(i), "'%s' expects a result", inst.Opcode.Opname)
return
case inst.Result != nil && expectsResult:
// Check the result is of the correct type
o := inst.Opcode.Operands[0]
p.operand(o.Name, o.Kind, i, false)
operands = operands[1:]
p.addIdentDef(inst.Result.Text(p.lines), inst, p.tok(i))
}
processOperand := func(o schema.Operand) bool {
if p.newline(i + n) {
return false
}
switch o.Quantifier {
case schema.Once:
if op, c := p.operand(o.Name, o.Kind, i+n, false); op != nil {
inst.Tokens = append(inst.Tokens, op.Tokens...)
n += c
}
case schema.ZeroOrOnce:
if op, c := p.operand(o.Name, o.Kind, i+n, true); op != nil {
inst.Tokens = append(inst.Tokens, op.Tokens...)
n += c
}
case schema.ZeroOrMany:
for !p.newline(i + n) {
if op, c := p.operand(o.Name, o.Kind, i+n, true); op != nil {
inst.Tokens = append(inst.Tokens, op.Tokens...)
n += c
} else {
return false
}
}
}
return true
}
for _, o := range operands {
if !processOperand(o) {
break
}
if inst.Opcode == schema.OpExtInst && n == 4 {
extImportTok, extNameTok := p.tok(i+n), p.tok(i+n+1)
extImport := extImportTok.Text(p.lines)
if extOpcodes, ok := p.extInstImports[extImport]; ok {
extName := extNameTok.Text(p.lines)
if extOpcode, ok := extOpcodes[extName]; ok {
n += 2 // skip ext import, ext name
for _, o := range extOpcode.Operands {
if !processOperand(o) {
break
}
}
} else {
p.err(extNameTok, "Unknown extension opcode '%s'", extName)
}
} else {
p.err(extImportTok, "Expected identifier to OpExtInstImport")
}
}
}
for _, t := range inst.Tokens {
inst.Range.grow(t.Range)
}
p.insts = append(p.insts, inst)
if inst.Opcode == schema.OpExtInstImport && len(inst.Tokens) >= 4 {
// Instruction is a OpExtInstImport. Keep track of this.
extTok := inst.Tokens[3]
extName := strings.Trim(extTok.Text(p.lines), `"`)
extOpcodes, ok := schema.ExtOpcodes[extName]
if !ok {
p.err(extTok, "Unknown extension '%s'", extName)
}
extImport := inst.Result.Text(p.lines)
p.extInstImports[extImport] = extOpcodes
}
return
}
// operand parses the operand with the name n, kind k, starting at the i'th
// token.
func (p *parser) operand(n string, k *schema.OperandKind, i int, optional bool) (*Operand, int) {
tok := p.tok(i)
if tok == nil {
return nil, 0
}
op := &Operand{
Name: n,
Kind: k,
Tokens: []*Token{tok},
}
p.mappings[tok] = op
switch k.Category {
case schema.OperandCategoryBitEnum, schema.OperandCategoryValueEnum:
s := tok.Text(p.lines)
for _, e := range k.Enumerants {
if e.Enumerant == s {
count := 1
for _, param := range e.Parameters {
p, c := p.operand(param.Name, param.Kind, i+count, false)
if p != nil {
op.Tokens = append(op.Tokens, p.Tokens...)
op.Parameters = append(op.Parameters, p)
}
count += c
}
// Handle bitfield '|' chains
if p.tok(i+count).Text(p.lines) == "|" {
count++ // '|'
p, c := p.operand(n, k, i+count, false)
if p != nil {
op.Tokens = append(op.Tokens, p.Tokens...)
op.Parameters = append(op.Parameters, p)
}
count += c
}
return op, count
}
}
if !optional {
p.err(p.tok(i), "invalid operand value '%s'", s)
}
return nil, 0
case schema.OperandCategoryID:
id := p.pident(i)
if id != nil {
p.addIdentRef(p.tok(i))
return op, 1
}
if !optional {
p.err(p.tok(i), "operand requires id, got '%s'", tok.Text(p.lines))
}
return nil, 0
case schema.OperandCategoryLiteral:
switch tok.Type {
case String, Integer, Float, Ident:
return op, 1
}
if !optional {
p.err(p.tok(i), "operand requires literal, got '%s'", tok.Text(p.lines))
}
return nil, 0
case schema.OperandCategoryComposite:
n := 1
for _, b := range k.Bases {
o, c := p.operand(b.Kind, b, i+n, optional)
if o != nil {
op.Tokens = append(op.Tokens, o.Tokens...)
}
n += c
}
return op, n
default:
p.err(p.tok(i), "OperandKind '%s' has unexpected category '%s'", k.Kind, k.Category)
return nil, 0
}
}
// tok returns the i'th token, or nil if i is out of bounds.
func (p *parser) tok(i int) *Token {
if i < 0 || i >= len(p.toks) {
return nil
}
return p.toks[i]
}
// opcode returns the schema.Opcode for the i'th token, or nil if the i'th token
// does not represent an opcode.
func (p *parser) opcode(i int) *schema.Opcode {
if tok := p.ident(i); tok != nil {
name := tok.Text(p.lines)
if inst, found := schema.Opcodes[name]; found {
return inst
}
}
return nil
}
// operator returns the operator for the i'th token, or and empty string if the
// i'th token is not an operator.
func (p *parser) operator(i int) string {
if tok := p.tok(i); tok != nil && tok.Type == Operator {
return tok.Text(p.lines)
}
return ""
}
// ident returns the i'th token if it is an Ident, otherwise nil.
func (p *parser) ident(i int) *Token {
if tok := p.tok(i); tok != nil && tok.Type == Ident {
return tok
}
return nil
}
// pident returns the i'th token if it is an PIdent, otherwise nil.
func (p *parser) pident(i int) *Token {
if tok := p.tok(i); tok != nil && tok.Type == PIdent {
return tok
}
return nil
}
// comment returns true if the i'th token is a Comment, otherwise false.
func (p *parser) comment(i int) bool {
if tok := p.tok(i); tok != nil && tok.Type == Comment {
return true
}
return false
}
// newline returns true if the i'th token is a Newline, otherwise false.
func (p *parser) newline(i int) bool {
if tok := p.tok(i); tok != nil && tok.Type == Newline {
return true
}
return false
}
// unexpected emits an 'unexpected token error' for the i'th token.
func (p *parser) unexpected(i int) {
p.err(p.toks[i], "syntax error: unexpected '%s'", p.toks[i].Text(p.lines))
}
// addIdentDef records the token definition for the instruction inst with the
// given id.
func (p *parser) addIdentDef(id string, inst *Instruction, def *Token) {
i, existing := p.idents[id]
if !existing {
i = &Identifier{}
p.idents[id] = i
}
if i.Definition == nil {
i.Definition = inst
} else {
p.err(def, "id '%v' redeclared", id)
}
}
// addIdentRef adds a identifier reference for the token ref.
func (p *parser) addIdentRef(ref *Token) {
id := ref.Text(p.lines)
i, existing := p.idents[id]
if !existing {
i = &Identifier{}
p.idents[id] = i
}
i.References = append(i.References, ref)
}
// err appends an fmt.Printf style error into l.diags for the given token.
func (p *parser) err(tok *Token, msg string, args ...interface{}) {
rng := Range{}
if tok != nil {
rng = tok.Range
}
p.diags = append(p.diags, Diagnostic{
Range: rng,
Severity: SeverityError,
Message: fmt.Sprintf(msg, args...),
})
}
// Parse parses the SPIR-V assembly string source, returning the parse results.
func Parse(source string) (Results, error) {
toks, diags, err := lex(source)
if err != nil {
return Results{}, err
}
lines := strings.SplitAfter(source, "\n")
p := parser{
lines: lines,
toks: toks,
idents: map[string]*Identifier{},
mappings: map[*Token]interface{}{},
extInstImports: map[string]schema.OpcodeMap{},
}
if err := p.parse(); err != nil {
return Results{}, err
}
diags = append(diags, p.diags...)
return Results{
Lines: lines,
Tokens: toks,
Diagnostics: p.diags,
Identifiers: p.idents,
Mappings: p.mappings,
}, nil
}
// IsResult returns true if k is used to store the result of an instruction.
func IsResult(k *schema.OperandKind) bool {
switch k {
case schema.OperandKindIdResult, schema.OperandKindIdResultType:
return true
default:
return false
}
}
// Results holds the output of Parse().
type Results struct {
Lines []string
Tokens []*Token
Diagnostics []Diagnostic
Identifiers map[string]*Identifier // identifiers by name
Mappings map[*Token]interface{} // tokens to semantic map
}
// Instruction describes a single instruction instance
type Instruction struct {
Tokens []*Token // all the tokens that make up the instruction
Result *Token // the token that represents the result of the instruction, or nil
Operands []*Operand // the operands of the instruction
Range Range // the textual range of the instruction
Opcode *schema.Opcode // the opcode for the instruction
}
// Operand describes a single operand instance
type Operand struct {
Name string // name of the operand
Kind *schema.OperandKind // kind of the operand
Tokens []*Token // all the tokens that make up the operand
Parameters []*Operand // all the parameters for the operand
}
// Identifier describes a single, unique SPIR-V identifier (i.e. %32)
type Identifier struct {
Definition *Instruction // where the identifier was defined
References []*Token // all the places the identifier was referenced
}
// Severity is an enumerator of diagnostic severities
type Severity int
// Severity levels
const (
SeverityError Severity = iota
SeverityWarning
SeverityInformation
SeverityHint
)
// Diagnostic holds a single diagnostic message that was generated while
// parsing.
type Diagnostic struct {
Range Range
Severity Severity
Message string
}