github.com/maruel/nin@v0.0.0-20220112143044-f35891e3ce7e/lexer.in.go (about)

     1  // Copyright 2011 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:build neverbuild
    16  // +build neverbuild
    17  
    18  package nin
    19  
    20  import (
    21  	"errors"
    22  	"fmt"
    23  	"strings"
    24  )
    25  
    26  type Token int32
    27  
    28  const (
    29  	ERROR Token = iota
    30  	BUILD
    31  	COLON
    32  	DEFAULT
    33  	EQUALS
    34  	IDENT
    35  	INCLUDE
    36  	INDENT
    37  	NEWLINE
    38  	PIPE
    39  	PIPE2
    40  	PIPEAT
    41  	POOL
    42  	RULE
    43  	SUBNINJA
    44  	TEOF
    45  )
    46  
    47  // String() returns a human-readable form of a token, used in error messages.
    48  func (t Token) String() string {
    49  	switch t {
    50  	case ERROR:
    51  		return "lexing error"
    52  	case BUILD:
    53  		return "'build'"
    54  	case COLON:
    55  		return "':'"
    56  	case DEFAULT:
    57  		return "'default'"
    58  	case EQUALS:
    59  		return "'='"
    60  	case IDENT:
    61  		return "identifier"
    62  	case INCLUDE:
    63  		return "'include'"
    64  	case INDENT:
    65  		return "indent"
    66  	case NEWLINE:
    67  		return "newline"
    68  	case PIPE2:
    69  		return "'||'"
    70  	case PIPE:
    71  		return "'|'"
    72  	case PIPEAT:
    73  		return "'|@'"
    74  	case POOL:
    75  		return "'pool'"
    76  	case RULE:
    77  		return "'rule'"
    78  	case SUBNINJA:
    79  		return "'subninja'"
    80  	case TEOF:
    81  		return "eof"
    82  	}
    83  	return "" // not reached
    84  }
    85  
    86  // errorHint returns a human-readable token hint, used in error messages.
    87  func (t Token) errorHint() string {
    88  	if t == COLON {
    89  		return " ($ also escapes ':')"
    90  	}
    91  	return ""
    92  }
    93  
    94  // lexerOffset permits quickly toggling between int64 and int32 to measure
    95  // performance impact.
    96  type lexerOffset = int
    97  
    98  // lexerState is the offset of processing a token.
    99  //
   100  // It is meant to be saved when an error message may be printed after the
   101  // parsing continued.
   102  type lexerState struct {
   103  	// In the original C++ code, these two are char pointers and are used to do
   104  	// pointer arithmetics. Go doesn't allow pointer arithmetics so they are
   105  	// indexes. ofs starts at 0. lastToken is initially -1 to mark that it is
   106  	// not yet set.
   107  	ofs       lexerOffset
   108  	lastToken lexerOffset
   109  }
   110  
   111  // error constructs an error message with context.
   112  func (l *lexerState) error(message, filename string, input []byte) error {
   113  	// Compute line/column.
   114  	line := lexerOffset(1)
   115  	lineStart := lexerOffset(0)
   116  	for p := lexerOffset(0); p < l.lastToken; p++ {
   117  		if input[p] == '\n' {
   118  			line++
   119  			lineStart = p + 1
   120  		}
   121  	}
   122  	col := lexerOffset(0)
   123  	if l.lastToken != -1 {
   124  		col = l.lastToken - lineStart
   125  	}
   126  
   127  	// Add some context to the message.
   128  	c := ""
   129  	const truncateColumn = 72
   130  	if col > 0 && col < truncateColumn {
   131  		truncated := true
   132  		length := lexerOffset(0)
   133  		for ; length < truncateColumn; length++ {
   134  			if input[lineStart+length] == 0 || input[lineStart+length] == '\n' {
   135  				truncated = false
   136  				break
   137  			}
   138  		}
   139  		c = unsafeString(input[lineStart : lineStart+length])
   140  		if truncated {
   141  			c += "..."
   142  		}
   143  		c += "\n"
   144  		c += strings.Repeat(" ", int(col))
   145  		c += "^ near here"
   146  	}
   147  	// TODO(maruel): There's a problem where the error is wrapped, thus the alignment doesn't work.
   148  	return fmt.Errorf("%s:%d: %s\n%s", filename, line, message, c)
   149  }
   150  
   151  type lexer struct {
   152  	// Immutable.
   153  	filename string
   154  	input    []byte
   155  
   156  	// Mutable.
   157  	lexerState
   158  }
   159  
   160  // Error constructs an error message with context.
   161  func (l *lexer) Error(message string) error {
   162  	return l.lexerState.error(message, l.filename, l.input)
   163  }
   164  
   165  // Start parsing some input.
   166  func (l *lexer) Start(filename string, input []byte) error {
   167  	l.filename = filename
   168  	if input[len(input)-1] != 0 {
   169  		panic("Requires hack with a trailing 0 byte")
   170  	}
   171  	if len(input) > 0x7fffffff {
   172  		return errors.New("input larger than 2gb is not supported")
   173  	}
   174  	l.input = input
   175  	l.ofs = 0
   176  	l.lastToken = -1
   177  	return nil
   178  }
   179  
   180  // If the last token read was an ERROR token, provide more info
   181  // or the empty string.
   182  func (l *lexer) DescribeLastError() string {
   183  	if l.lastToken != -1 {
   184  		switch l.input[l.lastToken] {
   185  		case '\t':
   186  			return "tabs are not allowed, use spaces"
   187  		}
   188  	}
   189  	return "lexing error"
   190  }
   191  
   192  // Rewind to the last read Token.
   193  func (l *lexer) UnreadToken() {
   194  	l.ofs = l.lastToken
   195  }
   196  
   197  func (l *lexer) ReadToken() Token {
   198  	p := l.ofs
   199  	q := lexerOffset(0)
   200  	start := lexerOffset(0)
   201  	var token Token
   202  	for {
   203  		start = p
   204  		/*!re2c
   205  		    re2c:define:YYCTYPE = "byte";
   206  		    re2c:define:YYCURSOR = "l.input[p]";
   207  				re2c:define:YYSKIP = "p++";
   208  		    re2c:define:YYMARKER = q;
   209  		    re2c:yyfill:enable = 0;
   210  				re2c:flags:nested-ifs = 0;
   211  		    re2c:define:YYPEEK = "l.input[p]";
   212  				re2c:define:YYBACKUP = "q = p";
   213  				re2c:define:YYRESTORE = "p = q";
   214  
   215  		    nul = "\000";
   216  		    simpleVarname = [a-zA-Z0-9_-]+;
   217  		    varname = [a-zA-Z0-9_.-]+;
   218  
   219  		    [ ]*"#"[^\000\n]*"\n" { continue; }
   220  		    [ ]*"\r\n" { token = NEWLINE;  break; }
   221  		    [ ]*"\n"   { token = NEWLINE;  break; }
   222  		    [ ]+       { token = INDENT;   break; }
   223  		    "build"    { token = BUILD;    break; }
   224  		    "pool"     { token = POOL;     break; }
   225  		    "rule"     { token = RULE;     break; }
   226  		    "default"  { token = DEFAULT;  break; }
   227  		    "="        { token = EQUALS;   break; }
   228  		    ":"        { token = COLON;    break; }
   229  				"|@"       { token = PIPEAT;   break; }
   230  		    "||"       { token = PIPE2;    break; }
   231  		    "|"        { token = PIPE;     break; }
   232  		    "include"  { token = INCLUDE;  break; }
   233  		    "subninja" { token = SUBNINJA; break; }
   234  		    varname    { token = IDENT;    break; }
   235  		    nul        { token = TEOF;     break; }
   236  		    [^]        { token = ERROR;    break; }
   237  		*/
   238  	}
   239  
   240  	l.lastToken = start
   241  	l.ofs = p
   242  	if token != NEWLINE && token != TEOF {
   243  		l.eatWhitespace()
   244  	}
   245  	return token
   246  }
   247  
   248  // If the next token is \a token, read it and return true.
   249  func (l *lexer) PeekToken(token Token) bool {
   250  	t := l.ReadToken()
   251  	if t == token {
   252  		return true
   253  	}
   254  	l.UnreadToken()
   255  	return false
   256  }
   257  
   258  // Skip past whitespace (called after each read token/ident/etc.).
   259  func (l *lexer) eatWhitespace() {
   260  	p := l.ofs
   261  	q := lexerOffset(0)
   262  	for {
   263  		l.ofs = p
   264  		/*!re2c
   265  		  [ ]+    { continue; }
   266  		  "$\r\n" { continue; }
   267  		  "$\n"   { continue; }
   268  		  nul     { break; }
   269  		  [^]     { break; }
   270  		*/
   271  	}
   272  }
   273  
   274  // Read a simple identifier (a rule or variable name).
   275  // Returns false if a name can't be read.
   276  func (l *lexer) readIdent() string {
   277  	out := ""
   278  	p := l.ofs
   279  	start := lexerOffset(0)
   280  	for {
   281  		start = p
   282  		/*!re2c
   283  		  varname {
   284  				out = unsafeString(l.input[start:p])
   285  		    break
   286  		  }
   287  		  [^] {
   288  		    l.lastToken = start
   289  		    return ""
   290  		  }
   291  		*/
   292  	}
   293  	l.lastToken = start
   294  	l.ofs = p
   295  	l.eatWhitespace()
   296  	return out
   297  }
   298  
   299  // readEvalString reads a $-escaped string.
   300  //
   301  // If path is true, read a path (complete with $escapes).
   302  //
   303  // If path is false, read the value side of a var = value line (complete with
   304  // $escapes).
   305  //
   306  // Returned path may be empty if a delimiter (space, newline) is hit.
   307  func (l *lexer) readEvalString(path bool) (EvalString, error) {
   308  	eval := EvalString{}
   309  	p := l.ofs
   310  	q := lexerOffset(0)
   311  	start := lexerOffset(0)
   312  	for {
   313  		start = p
   314  		/*!re2c
   315  		  [^$ :\r\n|\000]+ {
   316  				eval.Parsed = append(eval.Parsed, EvalStringToken{unsafeString(l.input[start: p]), false})
   317  		    continue
   318  		  }
   319  		  "\r\n" {
   320  		    if path {
   321  		      p = start
   322  		    }
   323  		    break
   324  		  }
   325  		  [ :|\n] {
   326  		    if path {
   327  		      p = start
   328  		      break
   329  		    } else {
   330  		      if l.input[start] == '\n' {
   331  		        break
   332  		      }
   333  					eval.Parsed = append(eval.Parsed, EvalStringToken{unsafeString(l.input[start:start+1]), false})
   334  		      continue
   335  		    }
   336  		  }
   337  		  "$$" {
   338  				eval.Parsed = append(eval.Parsed, EvalStringToken{"$", false})
   339  		    continue
   340  		  }
   341  		  "$ " {
   342  				eval.Parsed = append(eval.Parsed, EvalStringToken{" ", false})
   343  		    continue
   344  		  }
   345  		  "$\r\n"[ ]* {
   346  		    continue
   347  		  }
   348  		  "$\n"[ ]* {
   349  		    continue
   350  		  }
   351  		  "${"varname"}" {
   352  				eval.Parsed = append(eval.Parsed, EvalStringToken{unsafeString(l.input[start + 2: p - 1]), true})
   353  		    continue
   354  		  }
   355  		  "$"simpleVarname {
   356  				eval.Parsed = append(eval.Parsed, EvalStringToken{unsafeString(l.input[start + 1: p]), true})
   357  		    continue
   358  		  }
   359  		  "$:" {
   360  				eval.Parsed = append(eval.Parsed, EvalStringToken{":", false})
   361  		    continue
   362  		  }
   363  		  "$". {
   364  		    l.lastToken = start
   365  		    return eval, l.Error("bad $-escape (literal $ must be written as $$)")
   366  		  }
   367  		  nul {
   368  		    l.lastToken = start
   369  		    return eval, l.Error("unexpected EOF")
   370  		  }
   371  		  [^] {
   372  		    l.lastToken = start
   373  		    return eval, l.Error(l.DescribeLastError())
   374  		  }
   375  		*/
   376  	}
   377  	l.lastToken = start
   378  	l.ofs = p
   379  	if path {
   380  		l.eatWhitespace()
   381  	}
   382  	// Non-path strings end in newlines, so there's no whitespace to eat.
   383  	return eval, nil
   384  }