github.com/bir3/gocompiler@v0.9.2202/src/go/build/read.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package build 6 7 import ( 8 "bufio" 9 "bytes" 10 "errors" 11 "fmt" 12 "github.com/bir3/gocompiler/src/go/ast" 13 "github.com/bir3/gocompiler/src/go/parser" 14 "github.com/bir3/gocompiler/src/go/scanner" 15 "github.com/bir3/gocompiler/src/go/token" 16 "io" 17 "strconv" 18 "strings" 19 "unicode" 20 "unicode/utf8" 21 ) 22 23 type importReader struct { 24 b *bufio.Reader 25 buf []byte 26 peek byte 27 err error 28 eof bool 29 nerr int 30 pos token.Position 31 } 32 33 var bom = []byte{0xef, 0xbb, 0xbf} 34 35 func newImportReader(name string, r io.Reader) *importReader { 36 b := bufio.NewReader(r) 37 // Remove leading UTF-8 BOM. 38 // Per https://golang.org/ref/spec#Source_code_representation: 39 // a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF) 40 // if it is the first Unicode code point in the source text. 41 if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) { 42 b.Discard(3) 43 } 44 return &importReader{ 45 b: b, 46 pos: token.Position{ 47 Filename: name, 48 Line: 1, 49 Column: 1, 50 }, 51 } 52 } 53 54 func isIdent(c byte) bool { 55 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf 56 } 57 58 var ( 59 errSyntax = errors.New("syntax error") 60 errNUL = errors.New("unexpected NUL in input") 61 ) 62 63 // syntaxError records a syntax error, but only if an I/O error has not already been recorded. 64 func (r *importReader) syntaxError() { 65 if r.err == nil { 66 r.err = errSyntax 67 } 68 } 69 70 // readByte reads the next byte from the input, saves it in buf, and returns it. 71 // If an error occurs, readByte records the error in r.err and returns 0. 72 func (r *importReader) readByte() byte { 73 c, err := r.b.ReadByte() 74 if err == nil { 75 r.buf = append(r.buf, c) 76 if c == 0 { 77 err = errNUL 78 } 79 } 80 if err != nil { 81 if err == io.EOF { 82 r.eof = true 83 } else if r.err == nil { 84 r.err = err 85 } 86 c = 0 87 } 88 return c 89 } 90 91 // readByteNoBuf is like readByte but doesn't buffer the byte. 92 // It exhausts r.buf before reading from r.b. 93 func (r *importReader) readByteNoBuf() byte { 94 var c byte 95 var err error 96 if len(r.buf) > 0 { 97 c = r.buf[0] 98 r.buf = r.buf[1:] 99 } else { 100 c, err = r.b.ReadByte() 101 if err == nil && c == 0 { 102 err = errNUL 103 } 104 } 105 106 if err != nil { 107 if err == io.EOF { 108 r.eof = true 109 } else if r.err == nil { 110 r.err = err 111 } 112 return 0 113 } 114 r.pos.Offset++ 115 if c == '\n' { 116 r.pos.Line++ 117 r.pos.Column = 1 118 } else { 119 r.pos.Column++ 120 } 121 return c 122 } 123 124 // peekByte returns the next byte from the input reader but does not advance beyond it. 125 // If skipSpace is set, peekByte skips leading spaces and comments. 126 func (r *importReader) peekByte(skipSpace bool) byte { 127 if r.err != nil { 128 if r.nerr++; r.nerr > 10000 { 129 panic("go/build: import reader looping") 130 } 131 return 0 132 } 133 134 // Use r.peek as first input byte. 135 // Don't just return r.peek here: it might have been left by peekByte(false) 136 // and this might be peekByte(true). 137 c := r.peek 138 if c == 0 { 139 c = r.readByte() 140 } 141 for r.err == nil && !r.eof { 142 if skipSpace { 143 // For the purposes of this reader, semicolons are never necessary to 144 // understand the input and are treated as spaces. 145 switch c { 146 case ' ', '\f', '\t', '\r', '\n', ';': 147 c = r.readByte() 148 continue 149 150 case '/': 151 c = r.readByte() 152 if c == '/' { 153 for c != '\n' && r.err == nil && !r.eof { 154 c = r.readByte() 155 } 156 } else if c == '*' { 157 var c1 byte 158 for (c != '*' || c1 != '/') && r.err == nil { 159 if r.eof { 160 r.syntaxError() 161 } 162 c, c1 = c1, r.readByte() 163 } 164 } else { 165 r.syntaxError() 166 } 167 c = r.readByte() 168 continue 169 } 170 } 171 break 172 } 173 r.peek = c 174 return r.peek 175 } 176 177 // nextByte is like peekByte but advances beyond the returned byte. 178 func (r *importReader) nextByte(skipSpace bool) byte { 179 c := r.peekByte(skipSpace) 180 r.peek = 0 181 return c 182 } 183 184 var goEmbed = []byte("go:embed") 185 186 // findEmbed advances the input reader to the next //go:embed comment. 187 // It reports whether it found a comment. 188 // (Otherwise it found an error or EOF.) 189 func (r *importReader) findEmbed(first bool) bool { 190 // The import block scan stopped after a non-space character, 191 // so the reader is not at the start of a line on the first call. 192 // After that, each //go:embed extraction leaves the reader 193 // at the end of a line. 194 startLine := !first 195 var c byte 196 for r.err == nil && !r.eof { 197 c = r.readByteNoBuf() 198 Reswitch: 199 switch c { 200 default: 201 startLine = false 202 203 case '\n': 204 startLine = true 205 206 case ' ', '\t': 207 // leave startLine alone 208 209 case '"': 210 startLine = false 211 for r.err == nil { 212 if r.eof { 213 r.syntaxError() 214 } 215 c = r.readByteNoBuf() 216 if c == '\\' { 217 r.readByteNoBuf() 218 if r.err != nil { 219 r.syntaxError() 220 return false 221 } 222 continue 223 } 224 if c == '"' { 225 c = r.readByteNoBuf() 226 goto Reswitch 227 } 228 } 229 goto Reswitch 230 231 case '`': 232 startLine = false 233 for r.err == nil { 234 if r.eof { 235 r.syntaxError() 236 } 237 c = r.readByteNoBuf() 238 if c == '`' { 239 c = r.readByteNoBuf() 240 goto Reswitch 241 } 242 } 243 244 case '\'': 245 startLine = false 246 for r.err == nil { 247 if r.eof { 248 r.syntaxError() 249 } 250 c = r.readByteNoBuf() 251 if c == '\\' { 252 r.readByteNoBuf() 253 if r.err != nil { 254 r.syntaxError() 255 return false 256 } 257 continue 258 } 259 if c == '\'' { 260 c = r.readByteNoBuf() 261 goto Reswitch 262 } 263 } 264 265 case '/': 266 c = r.readByteNoBuf() 267 switch c { 268 default: 269 startLine = false 270 goto Reswitch 271 272 case '*': 273 var c1 byte 274 for (c != '*' || c1 != '/') && r.err == nil { 275 if r.eof { 276 r.syntaxError() 277 } 278 c, c1 = c1, r.readByteNoBuf() 279 } 280 startLine = false 281 282 case '/': 283 if startLine { 284 // Try to read this as a //go:embed comment. 285 for i := range goEmbed { 286 c = r.readByteNoBuf() 287 if c != goEmbed[i] { 288 goto SkipSlashSlash 289 } 290 } 291 c = r.readByteNoBuf() 292 if c == ' ' || c == '\t' { 293 // Found one! 294 return true 295 } 296 } 297 SkipSlashSlash: 298 for c != '\n' && r.err == nil && !r.eof { 299 c = r.readByteNoBuf() 300 } 301 startLine = true 302 } 303 } 304 } 305 return false 306 } 307 308 // readKeyword reads the given keyword from the input. 309 // If the keyword is not present, readKeyword records a syntax error. 310 func (r *importReader) readKeyword(kw string) { 311 r.peekByte(true) 312 for i := 0; i < len(kw); i++ { 313 if r.nextByte(false) != kw[i] { 314 r.syntaxError() 315 return 316 } 317 } 318 if isIdent(r.peekByte(false)) { 319 r.syntaxError() 320 } 321 } 322 323 // readIdent reads an identifier from the input. 324 // If an identifier is not present, readIdent records a syntax error. 325 func (r *importReader) readIdent() { 326 c := r.peekByte(true) 327 if !isIdent(c) { 328 r.syntaxError() 329 return 330 } 331 for isIdent(r.peekByte(false)) { 332 r.peek = 0 333 } 334 } 335 336 // readString reads a quoted string literal from the input. 337 // If an identifier is not present, readString records a syntax error. 338 func (r *importReader) readString() { 339 switch r.nextByte(true) { 340 case '`': 341 for r.err == nil { 342 if r.nextByte(false) == '`' { 343 break 344 } 345 if r.eof { 346 r.syntaxError() 347 } 348 } 349 case '"': 350 for r.err == nil { 351 c := r.nextByte(false) 352 if c == '"' { 353 break 354 } 355 if r.eof || c == '\n' { 356 r.syntaxError() 357 } 358 if c == '\\' { 359 r.nextByte(false) 360 } 361 } 362 default: 363 r.syntaxError() 364 } 365 } 366 367 // readImport reads an import clause - optional identifier followed by quoted string - 368 // from the input. 369 func (r *importReader) readImport() { 370 c := r.peekByte(true) 371 if c == '.' { 372 r.peek = 0 373 } else if isIdent(c) { 374 r.readIdent() 375 } 376 r.readString() 377 } 378 379 // readComments is like io.ReadAll, except that it only reads the leading 380 // block of comments in the file. 381 func readComments(f io.Reader) ([]byte, error) { 382 r := newImportReader("", f) 383 r.peekByte(true) 384 if r.err == nil && !r.eof { 385 // Didn't reach EOF, so must have found a non-space byte. Remove it. 386 r.buf = r.buf[:len(r.buf)-1] 387 } 388 return r.buf, r.err 389 } 390 391 // readGoInfo expects a Go file as input and reads the file up to and including the import section. 392 // It records what it learned in *info. 393 // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr, 394 // info.imports and info.embeds. 395 // 396 // It only returns an error if there are problems reading the file, 397 // not for syntax errors in the file itself. 398 func readGoInfo(f io.Reader, info *fileInfo) error { 399 r := newImportReader(info.name, f) 400 401 r.readKeyword("package") 402 r.readIdent() 403 for r.peekByte(true) == 'i' { 404 r.readKeyword("import") 405 if r.peekByte(true) == '(' { 406 r.nextByte(false) 407 for r.peekByte(true) != ')' && r.err == nil { 408 r.readImport() 409 } 410 r.nextByte(false) 411 } else { 412 r.readImport() 413 } 414 } 415 416 info.header = r.buf 417 418 // If we stopped successfully before EOF, we read a byte that told us we were done. 419 // Return all but that last byte, which would cause a syntax error if we let it through. 420 if r.err == nil && !r.eof { 421 info.header = r.buf[:len(r.buf)-1] 422 } 423 424 // If we stopped for a syntax error, consume the whole file so that 425 // we are sure we don't change the errors that go/parser returns. 426 if r.err == errSyntax { 427 r.err = nil 428 for r.err == nil && !r.eof { 429 r.readByte() 430 } 431 info.header = r.buf 432 } 433 if r.err != nil { 434 return r.err 435 } 436 437 if info.fset == nil { 438 return nil 439 } 440 441 // Parse file header & record imports. 442 info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments) 443 if info.parseErr != nil { 444 return nil 445 } 446 447 hasEmbed := false 448 for _, decl := range info.parsed.Decls { 449 d, ok := decl.(*ast.GenDecl) 450 if !ok { 451 continue 452 } 453 for _, dspec := range d.Specs { 454 spec, ok := dspec.(*ast.ImportSpec) 455 if !ok { 456 continue 457 } 458 quoted := spec.Path.Value 459 path, err := strconv.Unquote(quoted) 460 if err != nil { 461 return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted) 462 } 463 if !isValidImport(path) { 464 // The parser used to return a parse error for invalid import paths, but 465 // no longer does, so check for and create the error here instead. 466 info.parseErr = scanner.Error{Pos: info.fset.Position(spec.Pos()), Msg: "invalid import path: " + path} 467 info.imports = nil 468 return nil 469 } 470 if path == "embed" { 471 hasEmbed = true 472 } 473 474 doc := spec.Doc 475 if doc == nil && len(d.Specs) == 1 { 476 doc = d.Doc 477 } 478 info.imports = append(info.imports, fileImport{path, spec.Pos(), doc}) 479 } 480 } 481 482 // Extract directives. 483 for _, group := range info.parsed.Comments { 484 if group.Pos() >= info.parsed.Package { 485 break 486 } 487 for _, c := range group.List { 488 if strings.HasPrefix(c.Text, "//go:") { 489 info.directives = append(info.directives, Directive{c.Text, info.fset.Position(c.Slash)}) 490 } 491 } 492 } 493 494 // If the file imports "embed", 495 // we have to look for //go:embed comments 496 // in the remainder of the file. 497 // The compiler will enforce the mapping of comments to 498 // declared variables. We just need to know the patterns. 499 // If there were //go:embed comments earlier in the file 500 // (near the package statement or imports), the compiler 501 // will reject them. They can be (and have already been) ignored. 502 if hasEmbed { 503 var line []byte 504 for first := true; r.findEmbed(first); first = false { 505 line = line[:0] 506 pos := r.pos 507 for { 508 c := r.readByteNoBuf() 509 if c == '\n' || r.err != nil || r.eof { 510 break 511 } 512 line = append(line, c) 513 } 514 // Add args if line is well-formed. 515 // Ignore badly-formed lines - the compiler will report them when it finds them, 516 // and we can pretend they are not there to help go list succeed with what it knows. 517 embs, err := parseGoEmbed(string(line), pos) 518 if err == nil { 519 info.embeds = append(info.embeds, embs...) 520 } 521 } 522 } 523 524 return nil 525 } 526 527 // isValidImport checks if the import is a valid import using the more strict 528 // checks allowed by the implementation restriction in https://go.dev/ref/spec#Import_declarations. 529 // It was ported from the function of the same name that was removed from the 530 // parser in CL 424855, when the parser stopped doing these checks. 531 func isValidImport(s string) bool { 532 const illegalChars = `!"#$%&'()*,:;<=>?[\]^{|}` + "`\uFFFD" 533 for _, r := range s { 534 if !unicode.IsGraphic(r) || unicode.IsSpace(r) || strings.ContainsRune(illegalChars, r) { 535 return false 536 } 537 } 538 return s != "" 539 } 540 541 // parseGoEmbed parses the text following "//go:embed" to extract the glob patterns. 542 // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings. 543 // This is based on a similar function in cmd/compile/internal/gc/noder.go; 544 // this version calculates position information as well. 545 func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) { 546 trimBytes := func(n int) { 547 pos.Offset += n 548 pos.Column += utf8.RuneCountInString(args[:n]) 549 args = args[n:] 550 } 551 trimSpace := func() { 552 trim := strings.TrimLeftFunc(args, unicode.IsSpace) 553 trimBytes(len(args) - len(trim)) 554 } 555 556 var list []fileEmbed 557 for trimSpace(); args != ""; trimSpace() { 558 var path string 559 pathPos := pos 560 Switch: 561 switch args[0] { 562 default: 563 i := len(args) 564 for j, c := range args { 565 if unicode.IsSpace(c) { 566 i = j 567 break 568 } 569 } 570 path = args[:i] 571 trimBytes(i) 572 573 case '`': 574 var ok bool 575 path, _, ok = strings.Cut(args[1:], "`") 576 if !ok { 577 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 578 } 579 trimBytes(1 + len(path) + 1) 580 581 case '"': 582 i := 1 583 for ; i < len(args); i++ { 584 if args[i] == '\\' { 585 i++ 586 continue 587 } 588 if args[i] == '"' { 589 q, err := strconv.Unquote(args[:i+1]) 590 if err != nil { 591 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1]) 592 } 593 path = q 594 trimBytes(i + 1) 595 break Switch 596 } 597 } 598 if i >= len(args) { 599 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 600 } 601 } 602 603 if args != "" { 604 r, _ := utf8.DecodeRuneInString(args) 605 if !unicode.IsSpace(r) { 606 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 607 } 608 } 609 list = append(list, fileEmbed{path, pathPos}) 610 } 611 return list, nil 612 }