github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/go/modindex/build_read.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file is a lightly modified copy go/build/read.go with unused parts 6 // removed. 7 8 package modindex 9 10 import ( 11 "bufio" 12 "bytes" 13 "errors" 14 "fmt" 15 "go/ast" 16 "go/build" 17 "go/parser" 18 "go/token" 19 "io" 20 "strconv" 21 "strings" 22 "unicode" 23 "unicode/utf8" 24 ) 25 26 type importReader struct { 27 b *bufio.Reader 28 buf []byte 29 peek byte 30 err error 31 eof bool 32 nerr int 33 pos token.Position 34 } 35 36 var bom = []byte{0xef, 0xbb, 0xbf} 37 38 func newImportReader(name string, r io.Reader) *importReader { 39 b := bufio.NewReader(r) 40 // Remove leading UTF-8 BOM. 41 // Per https://golang.org/ref/spec#Source_code_representation: 42 // a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF) 43 // if it is the first Unicode code point in the source text. 44 if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) { 45 b.Discard(3) 46 } 47 return &importReader{ 48 b: b, 49 pos: token.Position{ 50 Filename: name, 51 Line: 1, 52 Column: 1, 53 }, 54 } 55 } 56 57 func isIdent(c byte) bool { 58 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf 59 } 60 61 var ( 62 errSyntax = errors.New("syntax error") 63 errNUL = errors.New("unexpected NUL in input") 64 ) 65 66 // syntaxError records a syntax error, but only if an I/O error has not already been recorded. 67 func (r *importReader) syntaxError() { 68 if r.err == nil { 69 r.err = errSyntax 70 } 71 } 72 73 // readByte reads the next byte from the input, saves it in buf, and returns it. 74 // If an error occurs, readByte records the error in r.err and returns 0. 75 func (r *importReader) readByte() byte { 76 c, err := r.b.ReadByte() 77 if err == nil { 78 r.buf = append(r.buf, c) 79 if c == 0 { 80 err = errNUL 81 } 82 } 83 if err != nil { 84 if err == io.EOF { 85 r.eof = true 86 } else if r.err == nil { 87 r.err = err 88 } 89 c = 0 90 } 91 return c 92 } 93 94 // readByteNoBuf is like readByte but doesn't buffer the byte. 95 // It exhausts r.buf before reading from r.b. 96 func (r *importReader) readByteNoBuf() byte { 97 var c byte 98 var err error 99 if len(r.buf) > 0 { 100 c = r.buf[0] 101 r.buf = r.buf[1:] 102 } else { 103 c, err = r.b.ReadByte() 104 if err == nil && c == 0 { 105 err = errNUL 106 } 107 } 108 109 if err != nil { 110 if err == io.EOF { 111 r.eof = true 112 } else if r.err == nil { 113 r.err = err 114 } 115 return 0 116 } 117 r.pos.Offset++ 118 if c == '\n' { 119 r.pos.Line++ 120 r.pos.Column = 1 121 } else { 122 r.pos.Column++ 123 } 124 return c 125 } 126 127 // peekByte returns the next byte from the input reader but does not advance beyond it. 128 // If skipSpace is set, peekByte skips leading spaces and comments. 129 func (r *importReader) peekByte(skipSpace bool) byte { 130 if r.err != nil { 131 if r.nerr++; r.nerr > 10000 { 132 panic("go/build: import reader looping") 133 } 134 return 0 135 } 136 137 // Use r.peek as first input byte. 138 // Don't just return r.peek here: it might have been left by peekByte(false) 139 // and this might be peekByte(true). 140 c := r.peek 141 if c == 0 { 142 c = r.readByte() 143 } 144 for r.err == nil && !r.eof { 145 if skipSpace { 146 // For the purposes of this reader, semicolons are never necessary to 147 // understand the input and are treated as spaces. 148 switch c { 149 case ' ', '\f', '\t', '\r', '\n', ';': 150 c = r.readByte() 151 continue 152 153 case '/': 154 c = r.readByte() 155 if c == '/' { 156 for c != '\n' && r.err == nil && !r.eof { 157 c = r.readByte() 158 } 159 } else if c == '*' { 160 var c1 byte 161 for (c != '*' || c1 != '/') && r.err == nil { 162 if r.eof { 163 r.syntaxError() 164 } 165 c, c1 = c1, r.readByte() 166 } 167 } else { 168 r.syntaxError() 169 } 170 c = r.readByte() 171 continue 172 } 173 } 174 break 175 } 176 r.peek = c 177 return r.peek 178 } 179 180 // nextByte is like peekByte but advances beyond the returned byte. 181 func (r *importReader) nextByte(skipSpace bool) byte { 182 c := r.peekByte(skipSpace) 183 r.peek = 0 184 return c 185 } 186 187 var goEmbed = []byte("go:embed") 188 189 // findEmbed advances the input reader to the next //go:embed comment. 190 // It reports whether it found a comment. 191 // (Otherwise it found an error or EOF.) 192 func (r *importReader) findEmbed(first bool) bool { 193 // The import block scan stopped after a non-space character, 194 // so the reader is not at the start of a line on the first call. 195 // After that, each //go:embed extraction leaves the reader 196 // at the end of a line. 197 startLine := !first 198 var c byte 199 for r.err == nil && !r.eof { 200 c = r.readByteNoBuf() 201 Reswitch: 202 switch c { 203 default: 204 startLine = false 205 206 case '\n': 207 startLine = true 208 209 case ' ', '\t': 210 // leave startLine alone 211 212 case '"': 213 startLine = false 214 for r.err == nil { 215 if r.eof { 216 r.syntaxError() 217 } 218 c = r.readByteNoBuf() 219 if c == '\\' { 220 r.readByteNoBuf() 221 if r.err != nil { 222 r.syntaxError() 223 return false 224 } 225 continue 226 } 227 if c == '"' { 228 c = r.readByteNoBuf() 229 goto Reswitch 230 } 231 } 232 goto Reswitch 233 234 case '`': 235 startLine = false 236 for r.err == nil { 237 if r.eof { 238 r.syntaxError() 239 } 240 c = r.readByteNoBuf() 241 if c == '`' { 242 c = r.readByteNoBuf() 243 goto Reswitch 244 } 245 } 246 247 case '\'': 248 startLine = false 249 for r.err == nil { 250 if r.eof { 251 r.syntaxError() 252 } 253 c = r.readByteNoBuf() 254 if c == '\\' { 255 r.readByteNoBuf() 256 if r.err != nil { 257 r.syntaxError() 258 return false 259 } 260 continue 261 } 262 if c == '\'' { 263 c = r.readByteNoBuf() 264 goto Reswitch 265 } 266 } 267 268 case '/': 269 c = r.readByteNoBuf() 270 switch c { 271 default: 272 startLine = false 273 goto Reswitch 274 275 case '*': 276 var c1 byte 277 for (c != '*' || c1 != '/') && r.err == nil { 278 if r.eof { 279 r.syntaxError() 280 } 281 c, c1 = c1, r.readByteNoBuf() 282 } 283 startLine = false 284 285 case '/': 286 if startLine { 287 // Try to read this as a //go:embed comment. 288 for i := range goEmbed { 289 c = r.readByteNoBuf() 290 if c != goEmbed[i] { 291 goto SkipSlashSlash 292 } 293 } 294 c = r.readByteNoBuf() 295 if c == ' ' || c == '\t' { 296 // Found one! 297 return true 298 } 299 } 300 SkipSlashSlash: 301 for c != '\n' && r.err == nil && !r.eof { 302 c = r.readByteNoBuf() 303 } 304 startLine = true 305 } 306 } 307 } 308 return false 309 } 310 311 // readKeyword reads the given keyword from the input. 312 // If the keyword is not present, readKeyword records a syntax error. 313 func (r *importReader) readKeyword(kw string) { 314 r.peekByte(true) 315 for i := 0; i < len(kw); i++ { 316 if r.nextByte(false) != kw[i] { 317 r.syntaxError() 318 return 319 } 320 } 321 if isIdent(r.peekByte(false)) { 322 r.syntaxError() 323 } 324 } 325 326 // readIdent reads an identifier from the input. 327 // If an identifier is not present, readIdent records a syntax error. 328 func (r *importReader) readIdent() { 329 c := r.peekByte(true) 330 if !isIdent(c) { 331 r.syntaxError() 332 return 333 } 334 for isIdent(r.peekByte(false)) { 335 r.peek = 0 336 } 337 } 338 339 // readString reads a quoted string literal from the input. 340 // If an identifier is not present, readString records a syntax error. 341 func (r *importReader) readString() { 342 switch r.nextByte(true) { 343 case '`': 344 for r.err == nil { 345 if r.nextByte(false) == '`' { 346 break 347 } 348 if r.eof { 349 r.syntaxError() 350 } 351 } 352 case '"': 353 for r.err == nil { 354 c := r.nextByte(false) 355 if c == '"' { 356 break 357 } 358 if r.eof || c == '\n' { 359 r.syntaxError() 360 } 361 if c == '\\' { 362 r.nextByte(false) 363 } 364 } 365 default: 366 r.syntaxError() 367 } 368 } 369 370 // readImport reads an import clause - optional identifier followed by quoted string - 371 // from the input. 372 func (r *importReader) readImport() { 373 c := r.peekByte(true) 374 if c == '.' { 375 r.peek = 0 376 } else if isIdent(c) { 377 r.readIdent() 378 } 379 r.readString() 380 } 381 382 // readComments is like io.ReadAll, except that it only reads the leading 383 // block of comments in the file. 384 func readComments(f io.Reader) ([]byte, error) { 385 r := newImportReader("", f) 386 r.peekByte(true) 387 if r.err == nil && !r.eof { 388 // Didn't reach EOF, so must have found a non-space byte. Remove it. 389 r.buf = r.buf[:len(r.buf)-1] 390 } 391 return r.buf, r.err 392 } 393 394 // readGoInfo expects a Go file as input and reads the file up to and including the import section. 395 // It records what it learned in *info. 396 // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr, 397 // info.imports and info.embeds. 398 // 399 // It only returns an error if there are problems reading the file, 400 // not for syntax errors in the file itself. 401 func readGoInfo(f io.Reader, info *fileInfo) error { 402 r := newImportReader(info.name, f) 403 404 r.readKeyword("package") 405 r.readIdent() 406 for r.peekByte(true) == 'i' { 407 r.readKeyword("import") 408 if r.peekByte(true) == '(' { 409 r.nextByte(false) 410 for r.peekByte(true) != ')' && r.err == nil { 411 r.readImport() 412 } 413 r.nextByte(false) 414 } else { 415 r.readImport() 416 } 417 } 418 419 info.header = r.buf 420 421 // If we stopped successfully before EOF, we read a byte that told us we were done. 422 // Return all but that last byte, which would cause a syntax error if we let it through. 423 if r.err == nil && !r.eof { 424 info.header = r.buf[:len(r.buf)-1] 425 } 426 427 // If we stopped for a syntax error, consume the whole file so that 428 // we are sure we don't change the errors that go/parser returns. 429 if r.err == errSyntax { 430 r.err = nil 431 for r.err == nil && !r.eof { 432 r.readByte() 433 } 434 info.header = r.buf 435 } 436 if r.err != nil { 437 return r.err 438 } 439 440 if info.fset == nil { 441 return nil 442 } 443 444 // Parse file header & record imports. 445 info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments) 446 if info.parseErr != nil { 447 return nil 448 } 449 450 hasEmbed := false 451 for _, decl := range info.parsed.Decls { 452 d, ok := decl.(*ast.GenDecl) 453 if !ok { 454 continue 455 } 456 for _, dspec := range d.Specs { 457 spec, ok := dspec.(*ast.ImportSpec) 458 if !ok { 459 continue 460 } 461 quoted := spec.Path.Value 462 path, err := strconv.Unquote(quoted) 463 if err != nil { 464 return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted) 465 } 466 if path == "embed" { 467 hasEmbed = true 468 } 469 470 doc := spec.Doc 471 if doc == nil && len(d.Specs) == 1 { 472 doc = d.Doc 473 } 474 info.imports = append(info.imports, fileImport{path, spec.Pos(), doc}) 475 } 476 } 477 478 // Extract directives. 479 for _, group := range info.parsed.Comments { 480 if group.Pos() >= info.parsed.Package { 481 break 482 } 483 for _, c := range group.List { 484 if strings.HasPrefix(c.Text, "//go:") { 485 info.directives = append(info.directives, build.Directive{Text: c.Text, Pos: info.fset.Position(c.Slash)}) 486 } 487 } 488 } 489 490 // If the file imports "embed", 491 // we have to look for //go:embed comments 492 // in the remainder of the file. 493 // The compiler will enforce the mapping of comments to 494 // declared variables. We just need to know the patterns. 495 // If there were //go:embed comments earlier in the file 496 // (near the package statement or imports), the compiler 497 // will reject them. They can be (and have already been) ignored. 498 if hasEmbed { 499 var line []byte 500 for first := true; r.findEmbed(first); first = false { 501 line = line[:0] 502 pos := r.pos 503 for { 504 c := r.readByteNoBuf() 505 if c == '\n' || r.err != nil || r.eof { 506 break 507 } 508 line = append(line, c) 509 } 510 // Add args if line is well-formed. 511 // Ignore badly-formed lines - the compiler will report them when it finds them, 512 // and we can pretend they are not there to help go list succeed with what it knows. 513 embs, err := parseGoEmbed(string(line), pos) 514 if err == nil { 515 info.embeds = append(info.embeds, embs...) 516 } 517 } 518 } 519 520 return nil 521 } 522 523 // parseGoEmbed parses the text following "//go:embed" to extract the glob patterns. 524 // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings. 525 // This is based on a similar function in github.com/go-asm/go/cmd/compile/gc/noder.go; 526 // this version calculates position information as well. 527 func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) { 528 trimBytes := func(n int) { 529 pos.Offset += n 530 pos.Column += utf8.RuneCountInString(args[:n]) 531 args = args[n:] 532 } 533 trimSpace := func() { 534 trim := strings.TrimLeftFunc(args, unicode.IsSpace) 535 trimBytes(len(args) - len(trim)) 536 } 537 538 var list []fileEmbed 539 for trimSpace(); args != ""; trimSpace() { 540 var path string 541 pathPos := pos 542 Switch: 543 switch args[0] { 544 default: 545 i := len(args) 546 for j, c := range args { 547 if unicode.IsSpace(c) { 548 i = j 549 break 550 } 551 } 552 path = args[:i] 553 trimBytes(i) 554 555 case '`': 556 var ok bool 557 path, _, ok = strings.Cut(args[1:], "`") 558 if !ok { 559 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 560 } 561 trimBytes(1 + len(path) + 1) 562 563 case '"': 564 i := 1 565 for ; i < len(args); i++ { 566 if args[i] == '\\' { 567 i++ 568 continue 569 } 570 if args[i] == '"' { 571 q, err := strconv.Unquote(args[:i+1]) 572 if err != nil { 573 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1]) 574 } 575 path = q 576 trimBytes(i + 1) 577 break Switch 578 } 579 } 580 if i >= len(args) { 581 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 582 } 583 } 584 585 if args != "" { 586 r, _ := utf8.DecodeRuneInString(args) 587 if !unicode.IsSpace(r) { 588 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 589 } 590 } 591 list = append(list, fileEmbed{path, pathPos}) 592 } 593 return list, nil 594 }