github.com/lovishpuri/go-40569/src@v0.0.0-20230519171745-f8623e7c56cf/go/build/read.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package build 6 7 import ( 8 "bufio" 9 "bytes" 10 "errors" 11 "fmt" 12 "go/ast" 13 "go/parser" 14 "go/token" 15 "io" 16 "strconv" 17 "strings" 18 "unicode" 19 "unicode/utf8" 20 ) 21 22 type importReader struct { 23 b *bufio.Reader 24 buf []byte 25 peek byte 26 err error 27 eof bool 28 nerr int 29 pos token.Position 30 } 31 32 var bom = []byte{0xef, 0xbb, 0xbf} 33 34 func newImportReader(name string, r io.Reader) *importReader { 35 b := bufio.NewReader(r) 36 // Remove leading UTF-8 BOM. 37 // Per https://golang.org/ref/spec#Source_code_representation: 38 // a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF) 39 // if it is the first Unicode code point in the source text. 40 if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) { 41 b.Discard(3) 42 } 43 return &importReader{ 44 b: b, 45 pos: token.Position{ 46 Filename: name, 47 Line: 1, 48 Column: 1, 49 }, 50 } 51 } 52 53 func isIdent(c byte) bool { 54 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf 55 } 56 57 var ( 58 errSyntax = errors.New("syntax error") 59 errNUL = errors.New("unexpected NUL in input") 60 ) 61 62 // syntaxError records a syntax error, but only if an I/O error has not already been recorded. 63 func (r *importReader) syntaxError() { 64 if r.err == nil { 65 r.err = errSyntax 66 } 67 } 68 69 // readByte reads the next byte from the input, saves it in buf, and returns it. 70 // If an error occurs, readByte records the error in r.err and returns 0. 71 func (r *importReader) readByte() byte { 72 c, err := r.b.ReadByte() 73 if err == nil { 74 r.buf = append(r.buf, c) 75 if c == 0 { 76 err = errNUL 77 } 78 } 79 if err != nil { 80 if err == io.EOF { 81 r.eof = true 82 } else if r.err == nil { 83 r.err = err 84 } 85 c = 0 86 } 87 return c 88 } 89 90 // readByteNoBuf is like readByte but doesn't buffer the byte. 91 // It exhausts r.buf before reading from r.b. 92 func (r *importReader) readByteNoBuf() byte { 93 var c byte 94 var err error 95 if len(r.buf) > 0 { 96 c = r.buf[0] 97 r.buf = r.buf[1:] 98 } else { 99 c, err = r.b.ReadByte() 100 if err == nil && c == 0 { 101 err = errNUL 102 } 103 } 104 105 if err != nil { 106 if err == io.EOF { 107 r.eof = true 108 } else if r.err == nil { 109 r.err = err 110 } 111 return 0 112 } 113 r.pos.Offset++ 114 if c == '\n' { 115 r.pos.Line++ 116 r.pos.Column = 1 117 } else { 118 r.pos.Column++ 119 } 120 return c 121 } 122 123 // peekByte returns the next byte from the input reader but does not advance beyond it. 124 // If skipSpace is set, peekByte skips leading spaces and comments. 125 func (r *importReader) peekByte(skipSpace bool) byte { 126 if r.err != nil { 127 if r.nerr++; r.nerr > 10000 { 128 panic("go/build: import reader looping") 129 } 130 return 0 131 } 132 133 // Use r.peek as first input byte. 134 // Don't just return r.peek here: it might have been left by peekByte(false) 135 // and this might be peekByte(true). 136 c := r.peek 137 if c == 0 { 138 c = r.readByte() 139 } 140 for r.err == nil && !r.eof { 141 if skipSpace { 142 // For the purposes of this reader, semicolons are never necessary to 143 // understand the input and are treated as spaces. 144 switch c { 145 case ' ', '\f', '\t', '\r', '\n', ';': 146 c = r.readByte() 147 continue 148 149 case '/': 150 c = r.readByte() 151 if c == '/' { 152 for c != '\n' && r.err == nil && !r.eof { 153 c = r.readByte() 154 } 155 } else if c == '*' { 156 var c1 byte 157 for (c != '*' || c1 != '/') && r.err == nil { 158 if r.eof { 159 r.syntaxError() 160 } 161 c, c1 = c1, r.readByte() 162 } 163 } else { 164 r.syntaxError() 165 } 166 c = r.readByte() 167 continue 168 } 169 } 170 break 171 } 172 r.peek = c 173 return r.peek 174 } 175 176 // nextByte is like peekByte but advances beyond the returned byte. 177 func (r *importReader) nextByte(skipSpace bool) byte { 178 c := r.peekByte(skipSpace) 179 r.peek = 0 180 return c 181 } 182 183 var goEmbed = []byte("go:embed") 184 185 // findEmbed advances the input reader to the next //go:embed comment. 186 // It reports whether it found a comment. 187 // (Otherwise it found an error or EOF.) 188 func (r *importReader) findEmbed(first bool) bool { 189 // The import block scan stopped after a non-space character, 190 // so the reader is not at the start of a line on the first call. 191 // After that, each //go:embed extraction leaves the reader 192 // at the end of a line. 193 startLine := !first 194 var c byte 195 for r.err == nil && !r.eof { 196 c = r.readByteNoBuf() 197 Reswitch: 198 switch c { 199 default: 200 startLine = false 201 202 case '\n': 203 startLine = true 204 205 case ' ', '\t': 206 // leave startLine alone 207 208 case '"': 209 startLine = false 210 for r.err == nil { 211 if r.eof { 212 r.syntaxError() 213 } 214 c = r.readByteNoBuf() 215 if c == '\\' { 216 r.readByteNoBuf() 217 if r.err != nil { 218 r.syntaxError() 219 return false 220 } 221 continue 222 } 223 if c == '"' { 224 c = r.readByteNoBuf() 225 goto Reswitch 226 } 227 } 228 goto Reswitch 229 230 case '`': 231 startLine = false 232 for r.err == nil { 233 if r.eof { 234 r.syntaxError() 235 } 236 c = r.readByteNoBuf() 237 if c == '`' { 238 c = r.readByteNoBuf() 239 goto Reswitch 240 } 241 } 242 243 case '\'': 244 startLine = false 245 for r.err == nil { 246 if r.eof { 247 r.syntaxError() 248 } 249 c = r.readByteNoBuf() 250 if c == '\\' { 251 r.readByteNoBuf() 252 if r.err != nil { 253 r.syntaxError() 254 return false 255 } 256 continue 257 } 258 if c == '\'' { 259 c = r.readByteNoBuf() 260 goto Reswitch 261 } 262 } 263 264 case '/': 265 c = r.readByteNoBuf() 266 switch c { 267 default: 268 startLine = false 269 goto Reswitch 270 271 case '*': 272 var c1 byte 273 for (c != '*' || c1 != '/') && r.err == nil { 274 if r.eof { 275 r.syntaxError() 276 } 277 c, c1 = c1, r.readByteNoBuf() 278 } 279 startLine = false 280 281 case '/': 282 if startLine { 283 // Try to read this as a //go:embed comment. 284 for i := range goEmbed { 285 c = r.readByteNoBuf() 286 if c != goEmbed[i] { 287 goto SkipSlashSlash 288 } 289 } 290 c = r.readByteNoBuf() 291 if c == ' ' || c == '\t' { 292 // Found one! 293 return true 294 } 295 } 296 SkipSlashSlash: 297 for c != '\n' && r.err == nil && !r.eof { 298 c = r.readByteNoBuf() 299 } 300 startLine = true 301 } 302 } 303 } 304 return false 305 } 306 307 // readKeyword reads the given keyword from the input. 308 // If the keyword is not present, readKeyword records a syntax error. 309 func (r *importReader) readKeyword(kw string) { 310 r.peekByte(true) 311 for i := 0; i < len(kw); i++ { 312 if r.nextByte(false) != kw[i] { 313 r.syntaxError() 314 return 315 } 316 } 317 if isIdent(r.peekByte(false)) { 318 r.syntaxError() 319 } 320 } 321 322 // readIdent reads an identifier from the input. 323 // If an identifier is not present, readIdent records a syntax error. 324 func (r *importReader) readIdent() { 325 c := r.peekByte(true) 326 if !isIdent(c) { 327 r.syntaxError() 328 return 329 } 330 for isIdent(r.peekByte(false)) { 331 r.peek = 0 332 } 333 } 334 335 // readString reads a quoted string literal from the input. 336 // If an identifier is not present, readString records a syntax error. 337 func (r *importReader) readString() { 338 switch r.nextByte(true) { 339 case '`': 340 for r.err == nil { 341 if r.nextByte(false) == '`' { 342 break 343 } 344 if r.eof { 345 r.syntaxError() 346 } 347 } 348 case '"': 349 for r.err == nil { 350 c := r.nextByte(false) 351 if c == '"' { 352 break 353 } 354 if r.eof || c == '\n' { 355 r.syntaxError() 356 } 357 if c == '\\' { 358 r.nextByte(false) 359 } 360 } 361 default: 362 r.syntaxError() 363 } 364 } 365 366 // readImport reads an import clause - optional identifier followed by quoted string - 367 // from the input. 368 func (r *importReader) readImport() { 369 c := r.peekByte(true) 370 if c == '.' { 371 r.peek = 0 372 } else if isIdent(c) { 373 r.readIdent() 374 } 375 r.readString() 376 } 377 378 // readComments is like io.ReadAll, except that it only reads the leading 379 // block of comments in the file. 380 func readComments(f io.Reader) ([]byte, error) { 381 r := newImportReader("", f) 382 r.peekByte(true) 383 if r.err == nil && !r.eof { 384 // Didn't reach EOF, so must have found a non-space byte. Remove it. 385 r.buf = r.buf[:len(r.buf)-1] 386 } 387 return r.buf, r.err 388 } 389 390 // readGoInfo expects a Go file as input and reads the file up to and including the import section. 391 // It records what it learned in *info. 392 // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr, 393 // info.imports and info.embeds. 394 // 395 // It only returns an error if there are problems reading the file, 396 // not for syntax errors in the file itself. 397 func readGoInfo(f io.Reader, info *fileInfo) error { 398 r := newImportReader(info.name, f) 399 400 r.readKeyword("package") 401 r.readIdent() 402 for r.peekByte(true) == 'i' { 403 r.readKeyword("import") 404 if r.peekByte(true) == '(' { 405 r.nextByte(false) 406 for r.peekByte(true) != ')' && r.err == nil { 407 r.readImport() 408 } 409 r.nextByte(false) 410 } else { 411 r.readImport() 412 } 413 } 414 415 info.header = r.buf 416 417 // If we stopped successfully before EOF, we read a byte that told us we were done. 418 // Return all but that last byte, which would cause a syntax error if we let it through. 419 if r.err == nil && !r.eof { 420 info.header = r.buf[:len(r.buf)-1] 421 } 422 423 // If we stopped for a syntax error, consume the whole file so that 424 // we are sure we don't change the errors that go/parser returns. 425 if r.err == errSyntax { 426 r.err = nil 427 for r.err == nil && !r.eof { 428 r.readByte() 429 } 430 info.header = r.buf 431 } 432 if r.err != nil { 433 return r.err 434 } 435 436 if info.fset == nil { 437 return nil 438 } 439 440 // Parse file header & record imports. 441 info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments) 442 if info.parseErr != nil { 443 return nil 444 } 445 446 hasEmbed := false 447 for _, decl := range info.parsed.Decls { 448 d, ok := decl.(*ast.GenDecl) 449 if !ok { 450 continue 451 } 452 for _, dspec := range d.Specs { 453 spec, ok := dspec.(*ast.ImportSpec) 454 if !ok { 455 continue 456 } 457 quoted := spec.Path.Value 458 path, err := strconv.Unquote(quoted) 459 if err != nil { 460 return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted) 461 } 462 if path == "embed" { 463 hasEmbed = true 464 } 465 466 doc := spec.Doc 467 if doc == nil && len(d.Specs) == 1 { 468 doc = d.Doc 469 } 470 info.imports = append(info.imports, fileImport{path, spec.Pos(), doc}) 471 } 472 } 473 474 // Extract directives. 475 for _, group := range info.parsed.Comments { 476 if group.Pos() >= info.parsed.Package { 477 break 478 } 479 for _, c := range group.List { 480 if strings.HasPrefix(c.Text, "//go:") { 481 info.directives = append(info.directives, Directive{c.Text, info.fset.Position(c.Slash)}) 482 } 483 } 484 } 485 486 // If the file imports "embed", 487 // we have to look for //go:embed comments 488 // in the remainder of the file. 489 // The compiler will enforce the mapping of comments to 490 // declared variables. We just need to know the patterns. 491 // If there were //go:embed comments earlier in the file 492 // (near the package statement or imports), the compiler 493 // will reject them. They can be (and have already been) ignored. 494 if hasEmbed { 495 var line []byte 496 for first := true; r.findEmbed(first); first = false { 497 line = line[:0] 498 pos := r.pos 499 for { 500 c := r.readByteNoBuf() 501 if c == '\n' || r.err != nil || r.eof { 502 break 503 } 504 line = append(line, c) 505 } 506 // Add args if line is well-formed. 507 // Ignore badly-formed lines - the compiler will report them when it finds them, 508 // and we can pretend they are not there to help go list succeed with what it knows. 509 embs, err := parseGoEmbed(string(line), pos) 510 if err == nil { 511 info.embeds = append(info.embeds, embs...) 512 } 513 } 514 } 515 516 return nil 517 } 518 519 // parseGoEmbed parses the text following "//go:embed" to extract the glob patterns. 520 // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings. 521 // This is based on a similar function in cmd/compile/internal/gc/noder.go; 522 // this version calculates position information as well. 523 func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) { 524 trimBytes := func(n int) { 525 pos.Offset += n 526 pos.Column += utf8.RuneCountInString(args[:n]) 527 args = args[n:] 528 } 529 trimSpace := func() { 530 trim := strings.TrimLeftFunc(args, unicode.IsSpace) 531 trimBytes(len(args) - len(trim)) 532 } 533 534 var list []fileEmbed 535 for trimSpace(); args != ""; trimSpace() { 536 var path string 537 pathPos := pos 538 Switch: 539 switch args[0] { 540 default: 541 i := len(args) 542 for j, c := range args { 543 if unicode.IsSpace(c) { 544 i = j 545 break 546 } 547 } 548 path = args[:i] 549 trimBytes(i) 550 551 case '`': 552 var ok bool 553 path, _, ok = strings.Cut(args[1:], "`") 554 if !ok { 555 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 556 } 557 trimBytes(1 + len(path) + 1) 558 559 case '"': 560 i := 1 561 for ; i < len(args); i++ { 562 if args[i] == '\\' { 563 i++ 564 continue 565 } 566 if args[i] == '"' { 567 q, err := strconv.Unquote(args[:i+1]) 568 if err != nil { 569 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1]) 570 } 571 path = q 572 trimBytes(i + 1) 573 break Switch 574 } 575 } 576 if i >= len(args) { 577 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 578 } 579 } 580 581 if args != "" { 582 r, _ := utf8.DecodeRuneInString(args) 583 if !unicode.IsSpace(r) { 584 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 585 } 586 } 587 list = append(list, fileEmbed{path, pathPos}) 588 } 589 return list, nil 590 }