| // Copyright (C) 2019 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Package parser implements a SPIR-V assembly parser. |
| package parser |
| |
| import ( |
| "fmt" |
| "io" |
| "log" |
| "strings" |
| "unicode" |
| "unicode/utf8" |
| |
| "github.com/KhronosGroup/SPIRV-Tools/utils/vscode/src/schema" |
| ) |
| |
| // Type is an enumerator of token types. |
| type Type int |
| |
| // Type enumerators |
| const ( |
| Ident Type = iota // Foo |
| PIdent // %32, %foo |
| Integer |
| Float |
| String |
| Operator |
| Comment |
| Newline |
| ) |
| |
| func (t Type) String() string { |
| switch t { |
| case Ident: |
| return "Ident" |
| case PIdent: |
| return "PIdent" |
| case Integer: |
| return "Integer" |
| case Float: |
| return "Float" |
| case String: |
| return "String" |
| case Operator: |
| return "Operator" |
| case Comment: |
| return "Comment" |
| default: |
| return "<unknown>" |
| } |
| } |
| |
| // Token represents a single lexed token. |
| type Token struct { |
| Type Type |
| Range Range |
| } |
| |
| func (t Token) String() string { return fmt.Sprintf("{%v %v}", t.Type, t.Range) } |
| |
| // Text returns the tokens text from the source. |
| func (t Token) Text(lines []string) string { return t.Range.Text(lines) } |
| |
| // Range represents an interval in a text file. |
| type Range struct { |
| Start Position |
| End Position |
| } |
| |
| func (r Range) String() string { return fmt.Sprintf("[%v %v]", r.Start, r.End) } |
| |
| // Text returns the text for the given Range in the provided lines. |
| func (r Range) Text(lines []string) string { |
| sl, sc := r.Start.Line-1, r.Start.Column-1 |
| if sl < 0 || sc < 0 || sl > len(lines) || sc > len(lines[sl]) { |
| return fmt.Sprintf("<invalid start position %v>", r.Start) |
| } |
| el, ec := r.End.Line-1, r.End.Column-1 |
| if el < 0 || ec < 0 || el > len(lines) || ec > len(lines[sl]) { |
| return fmt.Sprintf("<invalid end position %v>", r.End) |
| } |
| |
| sb := strings.Builder{} |
| if sl != el { |
| sb.WriteString(lines[sl][sc:]) |
| for l := sl + 1; l < el; l++ { |
| sb.WriteString(lines[l]) |
| } |
| sb.WriteString(lines[el][:ec]) |
| } else { |
| sb.WriteString(lines[sl][sc:ec]) |
| } |
| return sb.String() |
| } |
| |
| // Contains returns true if p is in r. |
| func (r Range) Contains(p Position) bool { |
| return !(p.LessThan(r.Start) || p.GreaterThan(r.End)) |
| } |
| |
| func (r *Range) grow(o Range) { |
| if !r.Start.IsValid() || o.Start.LessThan(r.Start) { |
| r.Start = o.Start |
| } |
| if !r.End.IsValid() || o.End.GreaterThan(r.End) { |
| r.End = o.End |
| } |
| } |
| |
| // Position holds a line and column position in a text file. |
| type Position struct { |
| Line, Column int |
| } |
| |
| func (p Position) String() string { return fmt.Sprintf("%v:%v", p.Line, p.Column) } |
| |
| // IsValid returns true if the position has a line and column greater than 1. |
| func (p Position) IsValid() bool { return p.Line > 0 && p.Column > 0 } |
| |
| // LessThan returns true iff o is before p. |
| func (p Position) LessThan(o Position) bool { |
| switch { |
| case !p.IsValid() || !o.IsValid(): |
| return false |
| case p.Line < o.Line: |
| return true |
| case p.Line > o.Line: |
| return false |
| case p.Column < o.Column: |
| return true |
| default: |
| return false |
| } |
| } |
| |
| // GreaterThan returns true iff o is greater than p. |
| func (p Position) GreaterThan(o Position) bool { |
| switch { |
| case !p.IsValid() || !o.IsValid(): |
| return false |
| case p.Line > o.Line: |
| return true |
| case p.Line < o.Line: |
| return false |
| case p.Column > o.Column: |
| return true |
| default: |
| return false |
| } |
| } |
| |
| type lexer struct { |
| source string |
| lexerState |
| diags []Diagnostic |
| e error |
| } |
| |
| type lexerState struct { |
| offset int // byte offset in source |
| toks []*Token // all the lexed tokens |
| pos Position // current position |
| } |
| |
| // err appends an fmt.Printf style error into l.diags for the given token. |
| func (l *lexer) err(tok *Token, msg string, args ...interface{}) { |
| rng := Range{} |
| if tok != nil { |
| rng = tok.Range |
| } |
| l.diags = append(l.diags, Diagnostic{ |
| Range: rng, |
| Severity: SeverityError, |
| Message: fmt.Sprintf(msg, args...), |
| }) |
| } |
| |
| // next returns the next rune, or io.EOF if the last rune has already been |
| // consumed. |
| func (l *lexer) next() rune { |
| if l.offset >= len(l.source) { |
| l.e = io.EOF |
| return 0 |
| } |
| r, n := utf8.DecodeRuneInString(l.source[l.offset:]) |
| l.offset += n |
| if n == 0 { |
| l.e = io.EOF |
| return 0 |
| } |
| if r == '\n' { |
| l.pos.Line++ |
| l.pos.Column = 1 |
| } else { |
| l.pos.Column++ |
| } |
| return r |
| } |
| |
| // save returns the current lexerState. |
| func (l *lexer) save() lexerState { |
| return l.lexerState |
| } |
| |
| // restore restores the current lexer state with s. |
| func (l *lexer) restore(s lexerState) { |
| l.lexerState = s |
| } |
| |
| // pident processes the PIdent token at the current position. |
| // The lexer *must* know the next token is a PIdent before calling. |
| func (l *lexer) pident() { |
| tok := &Token{Type: PIdent, Range: Range{Start: l.pos, End: l.pos}} |
| if r := l.next(); r != '%' { |
| log.Fatalf("lexer expected '%%', got '%v'", r) |
| return |
| } |
| for l.e == nil { |
| s := l.save() |
| r := l.next() |
| if !isAlphaNumeric(r) && r != '_' { |
| l.restore(s) |
| break |
| } |
| } |
| tok.Range.End = l.pos |
| l.toks = append(l.toks, tok) |
| } |
| |
| // numberOrIdent processes the Ident, Float or Integer token at the current |
| // position. |
| func (l *lexer) numberOrIdent() { |
| const Unknown Type = -1 |
| tok := &Token{Type: Unknown, Range: Range{Start: l.pos, End: l.pos}} |
| loop: |
| for l.e == nil { |
| s := l.save() |
| r := l.next() |
| switch { |
| case r == '-', r == '+', isNumeric(r): |
| continue |
| case isAlpha(r), r == '_': |
| switch tok.Type { |
| case Unknown: |
| tok.Type = Ident |
| case Float, Integer: |
| l.err(tok, "invalid number") |
| return |
| } |
| case r == '.': |
| switch tok.Type { |
| case Unknown: |
| tok.Type = Float |
| default: |
| l.restore(s) |
| break loop |
| } |
| default: |
| if tok.Type == Unknown { |
| tok.Type = Integer |
| } |
| l.restore(s) |
| break loop |
| } |
| } |
| tok.Range.End = l.pos |
| l.toks = append(l.toks, tok) |
| } |
| |
| // string processes the String token at the current position. |
| // The lexer *must* know the next token is a String before calling. |
| func (l *lexer) string() { |
| tok := &Token{Type: String, Range: Range{Start: l.pos, End: l.pos}} |
| if r := l.next(); r != '"' { |
| log.Fatalf("lexer expected '\"', got '%v'", r) |
| return |
| } |
| escape := false |
| for l.e == nil { |
| switch l.next() { |
| case '"': |
| if !escape { |
| tok.Range.End = l.pos |
| l.toks = append(l.toks, tok) |
| return |
| } |
| case '\\': |
| escape = !escape |
| default: |
| escape = false |
| } |
| } |
| } |
| |
| // operator processes the Operator token at the current position. |
| // The lexer *must* know the next token is a Operator before calling. |
| func (l *lexer) operator() { |
| tok := &Token{Type: Operator, Range: Range{Start: l.pos, End: l.pos}} |
| for l.e == nil { |
| switch l.next() { |
| case '=', '|': |
| tok.Range.End = l.pos |
| l.toks = append(l.toks, tok) |
| return |
| } |
| } |
| } |
| |
| // lineComment processes the Comment token at the current position. |
| // The lexer *must* know the next token is a Comment before calling. |
| func (l *lexer) lineComment() { |
| tok := &Token{Type: Comment, Range: Range{Start: l.pos, End: l.pos}} |
| if r := l.next(); r != ';' { |
| log.Fatalf("lexer expected ';', got '%v'", r) |
| return |
| } |
| for l.e == nil { |
| s := l.save() |
| switch l.next() { |
| case '\n': |
| l.restore(s) |
| tok.Range.End = l.pos |
| l.toks = append(l.toks, tok) |
| return |
| } |
| } |
| } |
| |
| // newline processes the Newline token at the current position. |
| // The lexer *must* know the next token is a Newline before calling. |
| func (l *lexer) newline() { |
| tok := &Token{Type: Newline, Range: Range{Start: l.pos, End: l.pos}} |
| if r := l.next(); r != '\n' { |
| log.Fatalf("lexer expected '\n', got '%v'", r) |
| return |
| } |
| tok.Range.End = l.pos |
| l.toks = append(l.toks, tok) |
| } |
| |
| // lex returns all the tokens and diagnostics after lexing source. |
| func lex(source string) ([]*Token, []Diagnostic, error) { |
| l := lexer{source: source, lexerState: lexerState{pos: Position{1, 1}}} |
| |
| lastPos := Position{} |
| for l.e == nil { |
| // Integrity check that the parser is making progress |
| if l.pos == lastPos { |
| log.Panicf("Parsing stuck at %v", l.pos) |
| } |
| lastPos = l.pos |
| |
| s := l.save() |
| r := l.next() |
| switch { |
| case r == '%': |
| l.restore(s) |
| l.pident() |
| case r == '+' || r == '-' || r == '_' || isAlphaNumeric(r): |
| l.restore(s) |
| l.numberOrIdent() |
| case r == '"': |
| l.restore(s) |
| l.string() |
| case r == '=', r == '|': |
| l.restore(s) |
| l.operator() |
| case r == ';': |
| l.restore(s) |
| l.lineComment() |
| case r == '\n': |
| l.restore(s) |
| l.newline() |
| } |
| } |
| if l.e != nil && l.e != io.EOF { |
| return nil, nil, l.e |
| } |
| return l.toks, l.diags, nil |
| } |
| |
| func isNumeric(r rune) bool { return unicode.IsDigit(r) } |
| func isAlpha(r rune) bool { return unicode.IsLetter(r) } |
| func isAlphaNumeric(r rune) bool { return isAlpha(r) || isNumeric(r) } |
| |
| type parser struct { |
| lines []string // all source lines |
| toks []*Token // all tokens |
| diags []Diagnostic // parser emitted diagnostics |
| idents map[string]*Identifier // identifiers by name |
| mappings map[*Token]interface{} // tokens to semantic map |
| extInstImports map[string]schema.OpcodeMap // extension imports by identifier |
| insts []*Instruction // all instructions |
| } |
| |
| func (p *parser) parse() error { |
| for i := 0; i < len(p.toks); { |
| if p.newline(i) || p.comment(i) { |
| i++ |
| continue |
| } |
| if n := p.instruction(i); n > 0 { |
| i += n |
| } else { |
| p.unexpected(i) |
| i++ |
| } |
| } |
| return nil |
| } |
| |
| // instruction parses the instruction starting at the i'th token. |
| func (p *parser) instruction(i int) (n int) { |
| inst := &Instruction{} |
| |
| switch { |
| case p.opcode(i) != nil: |
| inst.Opcode = p.opcode(i) |
| inst.Tokens = []*Token{p.tok(i)} |
| p.mappings[p.tok(i)] = inst |
| n++ |
| case p.opcode(i+2) != nil: // try '%id' '=' |
| inst.Result, inst.Opcode = p.pident(i), p.opcode(i+2) |
| if inst.Result == nil || p.operator(i+1) != "=" { |
| return 0 |
| } |
| n += 3 |
| inst.Tokens = []*Token{p.tok(i), p.tok(i + 1), p.tok(i + 2)} |
| p.mappings[p.tok(i+2)] = inst |
| default: |
| return |
| } |
| |
| expectsResult := len(inst.Opcode.Operands) > 0 && IsResult(inst.Opcode.Operands[0].Kind) |
| operands := inst.Opcode.Operands |
| switch { |
| case inst.Result != nil && !expectsResult: |
| p.err(inst.Result, "'%s' does not have a result", inst.Opcode.Opname) |
| return |
| case inst.Result == nil && expectsResult: |
| p.err(p.tok(i), "'%s' expects a result", inst.Opcode.Opname) |
| return |
| case inst.Result != nil && expectsResult: |
| // Check the result is of the correct type |
| o := inst.Opcode.Operands[0] |
| p.operand(o.Name, o.Kind, i, false) |
| operands = operands[1:] |
| p.addIdentDef(inst.Result.Text(p.lines), inst, p.tok(i)) |
| } |
| |
| processOperand := func(o schema.Operand) bool { |
| if p.newline(i + n) { |
| return false |
| } |
| |
| switch o.Quantifier { |
| case schema.Once: |
| if op, c := p.operand(o.Name, o.Kind, i+n, false); op != nil { |
| inst.Tokens = append(inst.Tokens, op.Tokens...) |
| n += c |
| } |
| case schema.ZeroOrOnce: |
| if op, c := p.operand(o.Name, o.Kind, i+n, true); op != nil { |
| inst.Tokens = append(inst.Tokens, op.Tokens...) |
| n += c |
| } |
| case schema.ZeroOrMany: |
| for !p.newline(i + n) { |
| if op, c := p.operand(o.Name, o.Kind, i+n, true); op != nil { |
| inst.Tokens = append(inst.Tokens, op.Tokens...) |
| n += c |
| } else { |
| return false |
| } |
| } |
| } |
| return true |
| } |
| |
| for _, o := range operands { |
| if !processOperand(o) { |
| break |
| } |
| |
| if inst.Opcode == schema.OpExtInst && n == 4 { |
| extImportTok, extNameTok := p.tok(i+n), p.tok(i+n+1) |
| extImport := extImportTok.Text(p.lines) |
| if extOpcodes, ok := p.extInstImports[extImport]; ok { |
| extName := extNameTok.Text(p.lines) |
| if extOpcode, ok := extOpcodes[extName]; ok { |
| n += 2 // skip ext import, ext name |
| for _, o := range extOpcode.Operands { |
| if !processOperand(o) { |
| break |
| } |
| } |
| } else { |
| p.err(extNameTok, "Unknown extension opcode '%s'", extName) |
| } |
| } else { |
| p.err(extImportTok, "Expected identifier to OpExtInstImport") |
| } |
| } |
| } |
| |
| for _, t := range inst.Tokens { |
| inst.Range.grow(t.Range) |
| } |
| |
| p.insts = append(p.insts, inst) |
| |
| if inst.Opcode == schema.OpExtInstImport && len(inst.Tokens) >= 4 { |
| // Instruction is a OpExtInstImport. Keep track of this. |
| extTok := inst.Tokens[3] |
| extName := strings.Trim(extTok.Text(p.lines), `"`) |
| extOpcodes, ok := schema.ExtOpcodes[extName] |
| if !ok { |
| p.err(extTok, "Unknown extension '%s'", extName) |
| } |
| extImport := inst.Result.Text(p.lines) |
| p.extInstImports[extImport] = extOpcodes |
| } |
| |
| return |
| } |
| |
| // operand parses the operand with the name n, kind k, starting at the i'th |
| // token. |
| func (p *parser) operand(n string, k *schema.OperandKind, i int, optional bool) (*Operand, int) { |
| tok := p.tok(i) |
| if tok == nil { |
| return nil, 0 |
| } |
| |
| op := &Operand{ |
| Name: n, |
| Kind: k, |
| Tokens: []*Token{tok}, |
| } |
| p.mappings[tok] = op |
| |
| switch k.Category { |
| case schema.OperandCategoryBitEnum, schema.OperandCategoryValueEnum: |
| s := tok.Text(p.lines) |
| for _, e := range k.Enumerants { |
| if e.Enumerant == s { |
| count := 1 |
| for _, param := range e.Parameters { |
| p, c := p.operand(param.Name, param.Kind, i+count, false) |
| if p != nil { |
| op.Tokens = append(op.Tokens, p.Tokens...) |
| op.Parameters = append(op.Parameters, p) |
| } |
| count += c |
| } |
| |
| // Handle bitfield '|' chains |
| if p.tok(i+count).Text(p.lines) == "|" { |
| count++ // '|' |
| p, c := p.operand(n, k, i+count, false) |
| if p != nil { |
| op.Tokens = append(op.Tokens, p.Tokens...) |
| op.Parameters = append(op.Parameters, p) |
| } |
| count += c |
| } |
| |
| return op, count |
| } |
| } |
| if !optional { |
| p.err(p.tok(i), "invalid operand value '%s'", s) |
| } |
| |
| return nil, 0 |
| |
| case schema.OperandCategoryID: |
| id := p.pident(i) |
| if id != nil { |
| p.addIdentRef(p.tok(i)) |
| return op, 1 |
| } |
| if !optional { |
| p.err(p.tok(i), "operand requires id, got '%s'", tok.Text(p.lines)) |
| } |
| return nil, 0 |
| |
| case schema.OperandCategoryLiteral: |
| switch tok.Type { |
| case String, Integer, Float, Ident: |
| return op, 1 |
| } |
| if !optional { |
| p.err(p.tok(i), "operand requires literal, got '%s'", tok.Text(p.lines)) |
| } |
| return nil, 0 |
| |
| case schema.OperandCategoryComposite: |
| n := 1 |
| for _, b := range k.Bases { |
| o, c := p.operand(b.Kind, b, i+n, optional) |
| if o != nil { |
| op.Tokens = append(op.Tokens, o.Tokens...) |
| } |
| n += c |
| } |
| return op, n |
| |
| default: |
| p.err(p.tok(i), "OperandKind '%s' has unexpected category '%s'", k.Kind, k.Category) |
| return nil, 0 |
| } |
| } |
| |
| // tok returns the i'th token, or nil if i is out of bounds. |
| func (p *parser) tok(i int) *Token { |
| if i < 0 || i >= len(p.toks) { |
| return nil |
| } |
| return p.toks[i] |
| } |
| |
| // opcode returns the schema.Opcode for the i'th token, or nil if the i'th token |
| // does not represent an opcode. |
| func (p *parser) opcode(i int) *schema.Opcode { |
| if tok := p.ident(i); tok != nil { |
| name := tok.Text(p.lines) |
| if inst, found := schema.Opcodes[name]; found { |
| return inst |
| } |
| } |
| return nil |
| } |
| |
| // operator returns the operator for the i'th token, or and empty string if the |
| // i'th token is not an operator. |
| func (p *parser) operator(i int) string { |
| if tok := p.tok(i); tok != nil && tok.Type == Operator { |
| return tok.Text(p.lines) |
| } |
| return "" |
| } |
| |
| // ident returns the i'th token if it is an Ident, otherwise nil. |
| func (p *parser) ident(i int) *Token { |
| if tok := p.tok(i); tok != nil && tok.Type == Ident { |
| return tok |
| } |
| return nil |
| } |
| |
| // pident returns the i'th token if it is an PIdent, otherwise nil. |
| func (p *parser) pident(i int) *Token { |
| if tok := p.tok(i); tok != nil && tok.Type == PIdent { |
| return tok |
| } |
| return nil |
| } |
| |
| // comment returns true if the i'th token is a Comment, otherwise false. |
| func (p *parser) comment(i int) bool { |
| if tok := p.tok(i); tok != nil && tok.Type == Comment { |
| return true |
| } |
| return false |
| } |
| |
| // newline returns true if the i'th token is a Newline, otherwise false. |
| func (p *parser) newline(i int) bool { |
| if tok := p.tok(i); tok != nil && tok.Type == Newline { |
| return true |
| } |
| return false |
| } |
| |
| // unexpected emits an 'unexpected token error' for the i'th token. |
| func (p *parser) unexpected(i int) { |
| p.err(p.toks[i], "syntax error: unexpected '%s'", p.toks[i].Text(p.lines)) |
| } |
| |
| // addIdentDef records the token definition for the instruction inst with the |
| // given id. |
| func (p *parser) addIdentDef(id string, inst *Instruction, def *Token) { |
| i, existing := p.idents[id] |
| if !existing { |
| i = &Identifier{} |
| p.idents[id] = i |
| } |
| if i.Definition == nil { |
| i.Definition = inst |
| } else { |
| p.err(def, "id '%v' redeclared", id) |
| } |
| } |
| |
| // addIdentRef adds a identifier reference for the token ref. |
| func (p *parser) addIdentRef(ref *Token) { |
| id := ref.Text(p.lines) |
| i, existing := p.idents[id] |
| if !existing { |
| i = &Identifier{} |
| p.idents[id] = i |
| } |
| i.References = append(i.References, ref) |
| } |
| |
| // err appends an fmt.Printf style error into l.diags for the given token. |
| func (p *parser) err(tok *Token, msg string, args ...interface{}) { |
| rng := Range{} |
| if tok != nil { |
| rng = tok.Range |
| } |
| p.diags = append(p.diags, Diagnostic{ |
| Range: rng, |
| Severity: SeverityError, |
| Message: fmt.Sprintf(msg, args...), |
| }) |
| } |
| |
| // Parse parses the SPIR-V assembly string source, returning the parse results. |
| func Parse(source string) (Results, error) { |
| toks, diags, err := lex(source) |
| if err != nil { |
| return Results{}, err |
| } |
| lines := strings.SplitAfter(source, "\n") |
| p := parser{ |
| lines: lines, |
| toks: toks, |
| idents: map[string]*Identifier{}, |
| mappings: map[*Token]interface{}{}, |
| extInstImports: map[string]schema.OpcodeMap{}, |
| } |
| if err := p.parse(); err != nil { |
| return Results{}, err |
| } |
| diags = append(diags, p.diags...) |
| return Results{ |
| Lines: lines, |
| Tokens: toks, |
| Diagnostics: p.diags, |
| Identifiers: p.idents, |
| Mappings: p.mappings, |
| }, nil |
| } |
| |
| // IsResult returns true if k is used to store the result of an instruction. |
| func IsResult(k *schema.OperandKind) bool { |
| switch k { |
| case schema.OperandKindIdResult, schema.OperandKindIdResultType: |
| return true |
| default: |
| return false |
| } |
| } |
| |
| // Results holds the output of Parse(). |
| type Results struct { |
| Lines []string |
| Tokens []*Token |
| Diagnostics []Diagnostic |
| Identifiers map[string]*Identifier // identifiers by name |
| Mappings map[*Token]interface{} // tokens to semantic map |
| } |
| |
| // Instruction describes a single instruction instance |
| type Instruction struct { |
| Tokens []*Token // all the tokens that make up the instruction |
| Result *Token // the token that represents the result of the instruction, or nil |
| Operands []*Operand // the operands of the instruction |
| Range Range // the textual range of the instruction |
| Opcode *schema.Opcode // the opcode for the instruction |
| } |
| |
| // Operand describes a single operand instance |
| type Operand struct { |
| Name string // name of the operand |
| Kind *schema.OperandKind // kind of the operand |
| Tokens []*Token // all the tokens that make up the operand |
| Parameters []*Operand // all the parameters for the operand |
| } |
| |
| // Identifier describes a single, unique SPIR-V identifier (i.e. %32) |
| type Identifier struct { |
| Definition *Instruction // where the identifier was defined |
| References []*Token // all the places the identifier was referenced |
| } |
| |
| // Severity is an enumerator of diagnositc seeverities |
| type Severity int |
| |
| // Severity levels |
| const ( |
| SeverityError Severity = iota |
| SeverityWarning |
| SeverityInformation |
| SeverityHint |
| ) |
| |
| // Diagnostic holds a single diagnostic message that was generated while |
| // parsing. |
| type Diagnostic struct { |
| Range Range |
| Severity Severity |
| Message string |
| } |