github.com/maruel/nin@v0.0.0-20220112143044-f35891e3ce7e/lexer.in.go (about) 1 // Copyright 2011 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build neverbuild 16 // +build neverbuild 17 18 package nin 19 20 import ( 21 "errors" 22 "fmt" 23 "strings" 24 ) 25 26 type Token int32 27 28 const ( 29 ERROR Token = iota 30 BUILD 31 COLON 32 DEFAULT 33 EQUALS 34 IDENT 35 INCLUDE 36 INDENT 37 NEWLINE 38 PIPE 39 PIPE2 40 PIPEAT 41 POOL 42 RULE 43 SUBNINJA 44 TEOF 45 ) 46 47 // String() returns a human-readable form of a token, used in error messages. 48 func (t Token) String() string { 49 switch t { 50 case ERROR: 51 return "lexing error" 52 case BUILD: 53 return "'build'" 54 case COLON: 55 return "':'" 56 case DEFAULT: 57 return "'default'" 58 case EQUALS: 59 return "'='" 60 case IDENT: 61 return "identifier" 62 case INCLUDE: 63 return "'include'" 64 case INDENT: 65 return "indent" 66 case NEWLINE: 67 return "newline" 68 case PIPE2: 69 return "'||'" 70 case PIPE: 71 return "'|'" 72 case PIPEAT: 73 return "'|@'" 74 case POOL: 75 return "'pool'" 76 case RULE: 77 return "'rule'" 78 case SUBNINJA: 79 return "'subninja'" 80 case TEOF: 81 return "eof" 82 } 83 return "" // not reached 84 } 85 86 // errorHint returns a human-readable token hint, used in error messages. 87 func (t Token) errorHint() string { 88 if t == COLON { 89 return " ($ also escapes ':')" 90 } 91 return "" 92 } 93 94 // lexerOffset permits quickly toggling between int64 and int32 to measure 95 // performance impact. 96 type lexerOffset = int 97 98 // lexerState is the offset of processing a token. 99 // 100 // It is meant to be saved when an error message may be printed after the 101 // parsing continued. 102 type lexerState struct { 103 // In the original C++ code, these two are char pointers and are used to do 104 // pointer arithmetics. Go doesn't allow pointer arithmetics so they are 105 // indexes. ofs starts at 0. lastToken is initially -1 to mark that it is 106 // not yet set. 107 ofs lexerOffset 108 lastToken lexerOffset 109 } 110 111 // error constructs an error message with context. 112 func (l *lexerState) error(message, filename string, input []byte) error { 113 // Compute line/column. 114 line := lexerOffset(1) 115 lineStart := lexerOffset(0) 116 for p := lexerOffset(0); p < l.lastToken; p++ { 117 if input[p] == '\n' { 118 line++ 119 lineStart = p + 1 120 } 121 } 122 col := lexerOffset(0) 123 if l.lastToken != -1 { 124 col = l.lastToken - lineStart 125 } 126 127 // Add some context to the message. 128 c := "" 129 const truncateColumn = 72 130 if col > 0 && col < truncateColumn { 131 truncated := true 132 length := lexerOffset(0) 133 for ; length < truncateColumn; length++ { 134 if input[lineStart+length] == 0 || input[lineStart+length] == '\n' { 135 truncated = false 136 break 137 } 138 } 139 c = unsafeString(input[lineStart : lineStart+length]) 140 if truncated { 141 c += "..." 142 } 143 c += "\n" 144 c += strings.Repeat(" ", int(col)) 145 c += "^ near here" 146 } 147 // TODO(maruel): There's a problem where the error is wrapped, thus the alignment doesn't work. 148 return fmt.Errorf("%s:%d: %s\n%s", filename, line, message, c) 149 } 150 151 type lexer struct { 152 // Immutable. 153 filename string 154 input []byte 155 156 // Mutable. 157 lexerState 158 } 159 160 // Error constructs an error message with context. 161 func (l *lexer) Error(message string) error { 162 return l.lexerState.error(message, l.filename, l.input) 163 } 164 165 // Start parsing some input. 166 func (l *lexer) Start(filename string, input []byte) error { 167 l.filename = filename 168 if input[len(input)-1] != 0 { 169 panic("Requires hack with a trailing 0 byte") 170 } 171 if len(input) > 0x7fffffff { 172 return errors.New("input larger than 2gb is not supported") 173 } 174 l.input = input 175 l.ofs = 0 176 l.lastToken = -1 177 return nil 178 } 179 180 // If the last token read was an ERROR token, provide more info 181 // or the empty string. 182 func (l *lexer) DescribeLastError() string { 183 if l.lastToken != -1 { 184 switch l.input[l.lastToken] { 185 case '\t': 186 return "tabs are not allowed, use spaces" 187 } 188 } 189 return "lexing error" 190 } 191 192 // Rewind to the last read Token. 193 func (l *lexer) UnreadToken() { 194 l.ofs = l.lastToken 195 } 196 197 func (l *lexer) ReadToken() Token { 198 p := l.ofs 199 q := lexerOffset(0) 200 start := lexerOffset(0) 201 var token Token 202 for { 203 start = p 204 /*!re2c 205 re2c:define:YYCTYPE = "byte"; 206 re2c:define:YYCURSOR = "l.input[p]"; 207 re2c:define:YYSKIP = "p++"; 208 re2c:define:YYMARKER = q; 209 re2c:yyfill:enable = 0; 210 re2c:flags:nested-ifs = 0; 211 re2c:define:YYPEEK = "l.input[p]"; 212 re2c:define:YYBACKUP = "q = p"; 213 re2c:define:YYRESTORE = "p = q"; 214 215 nul = "\000"; 216 simpleVarname = [a-zA-Z0-9_-]+; 217 varname = [a-zA-Z0-9_.-]+; 218 219 [ ]*"#"[^\000\n]*"\n" { continue; } 220 [ ]*"\r\n" { token = NEWLINE; break; } 221 [ ]*"\n" { token = NEWLINE; break; } 222 [ ]+ { token = INDENT; break; } 223 "build" { token = BUILD; break; } 224 "pool" { token = POOL; break; } 225 "rule" { token = RULE; break; } 226 "default" { token = DEFAULT; break; } 227 "=" { token = EQUALS; break; } 228 ":" { token = COLON; break; } 229 "|@" { token = PIPEAT; break; } 230 "||" { token = PIPE2; break; } 231 "|" { token = PIPE; break; } 232 "include" { token = INCLUDE; break; } 233 "subninja" { token = SUBNINJA; break; } 234 varname { token = IDENT; break; } 235 nul { token = TEOF; break; } 236 [^] { token = ERROR; break; } 237 */ 238 } 239 240 l.lastToken = start 241 l.ofs = p 242 if token != NEWLINE && token != TEOF { 243 l.eatWhitespace() 244 } 245 return token 246 } 247 248 // If the next token is \a token, read it and return true. 249 func (l *lexer) PeekToken(token Token) bool { 250 t := l.ReadToken() 251 if t == token { 252 return true 253 } 254 l.UnreadToken() 255 return false 256 } 257 258 // Skip past whitespace (called after each read token/ident/etc.). 259 func (l *lexer) eatWhitespace() { 260 p := l.ofs 261 q := lexerOffset(0) 262 for { 263 l.ofs = p 264 /*!re2c 265 [ ]+ { continue; } 266 "$\r\n" { continue; } 267 "$\n" { continue; } 268 nul { break; } 269 [^] { break; } 270 */ 271 } 272 } 273 274 // Read a simple identifier (a rule or variable name). 275 // Returns false if a name can't be read. 276 func (l *lexer) readIdent() string { 277 out := "" 278 p := l.ofs 279 start := lexerOffset(0) 280 for { 281 start = p 282 /*!re2c 283 varname { 284 out = unsafeString(l.input[start:p]) 285 break 286 } 287 [^] { 288 l.lastToken = start 289 return "" 290 } 291 */ 292 } 293 l.lastToken = start 294 l.ofs = p 295 l.eatWhitespace() 296 return out 297 } 298 299 // readEvalString reads a $-escaped string. 300 // 301 // If path is true, read a path (complete with $escapes). 302 // 303 // If path is false, read the value side of a var = value line (complete with 304 // $escapes). 305 // 306 // Returned path may be empty if a delimiter (space, newline) is hit. 307 func (l *lexer) readEvalString(path bool) (EvalString, error) { 308 eval := EvalString{} 309 p := l.ofs 310 q := lexerOffset(0) 311 start := lexerOffset(0) 312 for { 313 start = p 314 /*!re2c 315 [^$ :\r\n|\000]+ { 316 eval.Parsed = append(eval.Parsed, EvalStringToken{unsafeString(l.input[start: p]), false}) 317 continue 318 } 319 "\r\n" { 320 if path { 321 p = start 322 } 323 break 324 } 325 [ :|\n] { 326 if path { 327 p = start 328 break 329 } else { 330 if l.input[start] == '\n' { 331 break 332 } 333 eval.Parsed = append(eval.Parsed, EvalStringToken{unsafeString(l.input[start:start+1]), false}) 334 continue 335 } 336 } 337 "$$" { 338 eval.Parsed = append(eval.Parsed, EvalStringToken{"$", false}) 339 continue 340 } 341 "$ " { 342 eval.Parsed = append(eval.Parsed, EvalStringToken{" ", false}) 343 continue 344 } 345 "$\r\n"[ ]* { 346 continue 347 } 348 "$\n"[ ]* { 349 continue 350 } 351 "${"varname"}" { 352 eval.Parsed = append(eval.Parsed, EvalStringToken{unsafeString(l.input[start + 2: p - 1]), true}) 353 continue 354 } 355 "$"simpleVarname { 356 eval.Parsed = append(eval.Parsed, EvalStringToken{unsafeString(l.input[start + 1: p]), true}) 357 continue 358 } 359 "$:" { 360 eval.Parsed = append(eval.Parsed, EvalStringToken{":", false}) 361 continue 362 } 363 "$". { 364 l.lastToken = start 365 return eval, l.Error("bad $-escape (literal $ must be written as $$)") 366 } 367 nul { 368 l.lastToken = start 369 return eval, l.Error("unexpected EOF") 370 } 371 [^] { 372 l.lastToken = start 373 return eval, l.Error(l.DescribeLastError()) 374 } 375 */ 376 } 377 l.lastToken = start 378 l.ofs = p 379 if path { 380 l.eatWhitespace() 381 } 382 // Non-path strings end in newlines, so there's no whitespace to eat. 383 return eval, nil 384 }