github.com/bazelbuild/bazel-gazelle@v0.36.1-0.20240520142334-61b277ba6fed/language/go/fileinfo.go (about) 1 /* Copyright 2018 The Bazel Authors. All rights reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 */ 15 16 package golang 17 18 import ( 19 "bytes" 20 "errors" 21 "fmt" 22 "go/ast" 23 "go/parser" 24 "go/token" 25 "log" 26 "path" 27 "path/filepath" 28 "strconv" 29 "strings" 30 "unicode" 31 "unicode/utf8" 32 33 "github.com/bazelbuild/bazel-gazelle/config" 34 "github.com/bazelbuild/bazel-gazelle/internal/version" 35 "github.com/bazelbuild/bazel-gazelle/rule" 36 ) 37 38 // fileInfo holds information used to decide how to build a file. This 39 // information comes from the file's name, from package and import declarations 40 // (in .go files), and from +build and cgo comments. 41 type fileInfo struct { 42 path string 43 name string 44 45 // ext is the type of file, based on extension. 46 ext ext 47 48 // packageName is the Go package name of a .go file, without the 49 // "_test" suffix if it was present. It is empty for non-Go files. 50 packageName string 51 52 // hasMainFunction is true when packageName is "main" and a main function was 53 // found in the file 54 hasMainFunction bool 55 56 // isTest is true if the file stem (the part before the extension) 57 // ends with "_test.go". This is never true for non-Go files. 58 isTest bool 59 60 // isExternalTest is true when the file isTest and the original package 61 // name ends with "_test" 62 isExternalTest bool 63 64 // imports is a list of packages imported by a file. It does not include 65 // "C" or anything from the standard library. 66 imports []string 67 68 // embeds is a list of //go:embed patterns and their positions. 69 embeds []fileEmbed 70 71 // isCgo is true for .go files that import "C". 72 isCgo bool 73 74 // goos and goarch contain the OS and architecture suffixes in the filename, 75 // if they were present. 76 goos, goarch string 77 78 // tags is a list of build tag lines. Each entry is the trimmed text of 79 // a line after a "+build" prefix. 80 tags *buildTags 81 82 // cppopts, copts, cxxopts and clinkopts contain flags that are part 83 // of CPPFLAGS, CFLAGS, CXXFLAGS, and LDFLAGS directives in cgo comments. 84 cppopts, copts, cxxopts, clinkopts []*cgoTagsAndOpts 85 86 // hasServices indicates whether a .proto file has service definitions. 87 hasServices bool 88 } 89 90 // fileEmbed represents an individual go:embed pattern. 91 // A go:embed directive may contain multiple patterns. A pattern may match 92 // multiple files. 93 type fileEmbed struct { 94 path string 95 pos token.Position 96 } 97 98 // optSeparator is a special character inserted between options that appeared 99 // together in a #cgo directive. This allows options to be split, modified, 100 // and escaped by other packages. 101 // 102 // It's important to keep options grouped together in the same string. For 103 // example, if we have "-framework IOKit" together in a #cgo directive, 104 // "-framework" shouldn't be treated as a separate string for the purposes of 105 // sorting and de-duplicating. 106 const optSeparator = "\x1D" 107 108 // ext indicates how a file should be treated, based on extension. 109 type ext int 110 111 const ( 112 // unknownExt is applied files that aren't buildable with Go. 113 unknownExt ext = iota 114 115 // goExt is applied to .go files. 116 goExt 117 118 // cExt is applied to C and C++ files. 119 cExt 120 121 // hExt is applied to header files. If cgo code is present, these may be 122 // C or C++ headers. If not, they are treated as Go assembly headers. 123 hExt 124 125 // sExt is applied to Go assembly files, ending with .s. 126 sExt 127 128 // csExt is applied to other assembly files, ending with .S. These are built 129 // with the C compiler if cgo code is present. 130 csExt 131 132 // protoExt is applied to .proto files. 133 protoExt 134 ) 135 136 // fileNameInfo returns information that can be inferred from the name of 137 // a file. It does not read data from the file. 138 func fileNameInfo(path_ string) fileInfo { 139 name := filepath.Base(path_) 140 var ext ext 141 switch path.Ext(name) { 142 case ".go": 143 ext = goExt 144 case ".c", ".cc", ".cpp", ".cxx", ".m", ".mm": 145 ext = cExt 146 case ".h", ".hh", ".hpp", ".hxx": 147 ext = hExt 148 case ".s": 149 ext = sExt 150 case ".S": 151 ext = csExt 152 case ".proto": 153 ext = protoExt 154 default: 155 ext = unknownExt 156 } 157 if strings.HasPrefix(name, ".") || strings.HasPrefix(name, "_") { 158 ext = unknownExt 159 } 160 161 // Determine test, goos, and goarch. This is intended to match the logic 162 // in goodOSArchFile in go/build. 163 var isTest bool 164 var goos, goarch string 165 l := strings.Split(name[:len(name)-len(path.Ext(name))], "_") 166 if len(l) >= 2 && l[len(l)-1] == "test" { 167 isTest = ext == goExt 168 l = l[:len(l)-1] 169 } 170 switch { 171 case len(l) >= 3 && rule.KnownOSSet[l[len(l)-2]] && rule.KnownArchSet[l[len(l)-1]]: 172 goos = l[len(l)-2] 173 goarch = l[len(l)-1] 174 case len(l) >= 2 && rule.KnownOSSet[l[len(l)-1]]: 175 goos = l[len(l)-1] 176 case len(l) >= 2 && rule.KnownArchSet[l[len(l)-1]]: 177 goarch = l[len(l)-1] 178 } 179 180 return fileInfo{ 181 path: path_, 182 name: name, 183 ext: ext, 184 isTest: isTest, 185 goos: goos, 186 goarch: goarch, 187 } 188 } 189 190 // otherFileInfo returns information about a non-.go file. It will parse 191 // part of the file to determine build tags. If the file can't be read, an 192 // error will be logged, and partial information will be returned. 193 func otherFileInfo(path string) fileInfo { 194 info := fileNameInfo(path) 195 if info.ext == unknownExt { 196 return info 197 } 198 199 tags, err := readTags(info.path) 200 if err != nil { 201 log.Printf("%s: error reading file: %v", info.path, err) 202 return info 203 } 204 info.tags = tags 205 return info 206 } 207 208 // goFileInfo returns information about a .go file. It will parse part of the 209 // file to determine the package name, imports, and build constraints. 210 // If the file can't be read, an error will be logged, and partial information 211 // will be returned. 212 // This function is intended to match go/build.Context.Import. 213 // TODD(#53): extract canonical import path 214 func goFileInfo(path, rel string) fileInfo { 215 info := fileNameInfo(path) 216 fset := token.NewFileSet() 217 pf, err := parser.ParseFile(fset, info.path, nil, parser.ImportsOnly|parser.ParseComments) 218 if err != nil { 219 log.Printf("%s: error reading go file: %v", info.path, err) 220 return info 221 } 222 223 info.packageName = pf.Name.Name 224 if info.isTest && strings.HasSuffix(info.packageName, "_test") { 225 info.packageName = info.packageName[:len(info.packageName)-len("_test")] 226 info.isExternalTest = true 227 } 228 229 importsEmbed := false 230 for _, decl := range pf.Decls { 231 d, ok := decl.(*ast.GenDecl) 232 if !ok { 233 continue 234 } 235 for _, dspec := range d.Specs { 236 spec, ok := dspec.(*ast.ImportSpec) 237 if !ok { 238 continue 239 } 240 quoted := spec.Path.Value 241 path, err := strconv.Unquote(quoted) 242 if err != nil { 243 log.Printf("%s: error reading go file: %v", info.path, err) 244 continue 245 } 246 247 if path == "C" { 248 if info.isTest { 249 log.Printf("%s: warning: use of cgo in test not supported", info.path) 250 } 251 info.isCgo = true 252 cg := spec.Doc 253 if cg == nil && len(d.Specs) == 1 { 254 cg = d.Doc 255 } 256 if cg != nil { 257 if err := saveCgo(&info, rel, cg); err != nil { 258 log.Printf("%s: error reading go file: %v", info.path, err) 259 } 260 } 261 continue 262 } 263 if path == "embed" { 264 importsEmbed = true 265 } 266 info.imports = append(info.imports, path) 267 } 268 } 269 270 tags, err := readTags(info.path) 271 if err != nil { 272 log.Printf("%s: error reading go file: %v", info.path, err) 273 return info 274 } 275 info.tags = tags 276 277 if importsEmbed || info.packageName == "main" { 278 pf, err = parser.ParseFile(fset, info.path, nil, parser.ParseComments) 279 if err != nil { 280 log.Printf("%s: error reading go file: %v", info.path, err) 281 return info 282 } 283 for _, cg := range pf.Comments { 284 for _, c := range cg.List { 285 if !strings.HasPrefix(c.Text, "//go:embed") { 286 continue 287 } 288 args := c.Text[len("//go:embed"):] 289 p := c.Pos() 290 for len(args) > 0 && (args[0] == ' ' || args[0] == '\t') { 291 args = args[1:] 292 p++ 293 } 294 args = strings.TrimSpace(args) // trim right side 295 pos := fset.Position(p) 296 embeds, err := parseGoEmbed(args, pos) 297 if err != nil { 298 log.Printf("%v: parsing //go:embed directive: %v", pos, err) 299 continue 300 } 301 info.embeds = append(info.embeds, embeds...) 302 } 303 } 304 for _, decl := range pf.Decls { 305 if fdecl, ok := decl.(*ast.FuncDecl); ok { 306 if fdecl.Name.Name == "main" { 307 info.hasMainFunction = true 308 break 309 } 310 } 311 } 312 } 313 314 return info 315 } 316 317 // saveCgo extracts CFLAGS, CPPFLAGS, CXXFLAGS, and LDFLAGS directives 318 // from a comment above a "C" import. This is intended to match logic in 319 // go/build.Context.saveCgo. 320 func saveCgo(info *fileInfo, rel string, cg *ast.CommentGroup) error { 321 text := cg.Text() 322 for _, line := range strings.Split(text, "\n") { 323 orig := line 324 325 // Line is 326 // #cgo [GOOS/GOARCH...] LDFLAGS: stuff 327 // 328 line = strings.TrimSpace(line) 329 if len(line) < 5 || line[:4] != "#cgo" || (line[4] != ' ' && line[4] != '\t') { 330 continue 331 } 332 333 // Split at colon. 334 line, argstr, ok := strings.Cut(strings.TrimSpace(line[4:]), ":") 335 if !ok { 336 return fmt.Errorf("%s: invalid #cgo line: %s", info.path, orig) 337 } 338 339 // Parse tags and verb. 340 f := strings.Fields(line) 341 if len(f) < 1 { 342 return fmt.Errorf("%s: invalid #cgo line: %s", info.path, orig) 343 } 344 345 cond, verb := f[:len(f)-1], f[len(f)-1] 346 tags, err := matchAuto(cond) 347 if err != nil { 348 return err 349 } 350 351 // Parse options. 352 opts, err := splitQuoted(argstr) 353 if err != nil { 354 return fmt.Errorf("%s: invalid #cgo line: %s", info.path, orig) 355 } 356 357 for i, opt := range opts { 358 if opt, ok = expandSrcDir(opt, rel); !ok { 359 return fmt.Errorf("%s: malformed #cgo argument: %s", info.path, orig) 360 } 361 opts[i] = opt 362 } 363 joinedStr := strings.Join(opts, optSeparator) 364 365 // Add tags to appropriate list. 366 switch verb { 367 case "CPPFLAGS": 368 info.cppopts = append(info.cppopts, &cgoTagsAndOpts{tags, joinedStr}) 369 case "CFLAGS": 370 info.copts = append(info.copts, &cgoTagsAndOpts{tags, joinedStr}) 371 case "CXXFLAGS": 372 info.cxxopts = append(info.cxxopts, &cgoTagsAndOpts{tags, joinedStr}) 373 case "FFLAGS": 374 // TODO: Add support 375 return fmt.Errorf("%s: unsupported #cgo verb: %s", verb, info.path) 376 case "LDFLAGS": 377 info.clinkopts = append(info.clinkopts, &cgoTagsAndOpts{tags, joinedStr}) 378 case "pkg-config": 379 return fmt.Errorf("%s: pkg-config not supported: %s", info.path, orig) 380 default: 381 return fmt.Errorf("%s: invalid #cgo verb: %s", info.path, orig) 382 } 383 } 384 return nil 385 } 386 387 // splitQuoted splits the string s around each instance of one or more consecutive 388 // white space characters while taking into account quotes and escaping, and 389 // returns an array of substrings of s or an empty list if s contains only white space. 390 // Single quotes and double quotes are recognized to prevent splitting within the 391 // quoted region, and are removed from the resulting substrings. If a quote in s 392 // isn't closed err will be set and r will have the unclosed argument as the 393 // last element. The backslash is used for escaping. 394 // 395 // For example, the following string: 396 // 397 // a b:"c d" 'e''f' "g\"" 398 // 399 // Would be parsed as: 400 // 401 // []string{"a", "b:c d", "ef", `g"`} 402 // 403 // Copied from go/build.splitQuoted 404 func splitQuoted(s string) (r []string, err error) { 405 var args []string 406 arg := make([]rune, len(s)) 407 escaped := false 408 quoted := false 409 quote := '\x00' 410 i := 0 411 for _, rune := range s { 412 switch { 413 case escaped: 414 escaped = false 415 case rune == '\\': 416 escaped = true 417 continue 418 case quote != '\x00': 419 if rune == quote { 420 quote = '\x00' 421 continue 422 } 423 case rune == '"' || rune == '\'': 424 quoted = true 425 quote = rune 426 continue 427 case unicode.IsSpace(rune): 428 if quoted || i > 0 { 429 quoted = false 430 args = append(args, string(arg[:i])) 431 i = 0 432 } 433 continue 434 } 435 arg[i] = rune 436 i++ 437 } 438 if quoted || i > 0 { 439 args = append(args, string(arg[:i])) 440 } 441 if quote != 0 { 442 err = errors.New("unclosed quote") 443 } else if escaped { 444 err = errors.New("unfinished escaping") 445 } 446 return args, err 447 } 448 449 // expandSrcDir expands any occurrence of ${SRCDIR}, making sure 450 // the result is safe for the shell. 451 // 452 // Copied from go/build.expandSrcDir 453 func expandSrcDir(str string, srcdir string) (string, bool) { 454 // "\" delimited paths cause safeCgoName to fail 455 // so convert native paths with a different delimiter 456 // to "/" before starting (eg: on windows). 457 srcdir = filepath.ToSlash(srcdir) 458 if srcdir == "" { 459 srcdir = "." 460 } 461 462 // Spaces are tolerated in ${SRCDIR}, but not anywhere else. 463 chunks := strings.Split(str, "${SRCDIR}") 464 if len(chunks) < 2 { 465 return str, safeCgoName(str, false) 466 } 467 ok := true 468 for _, chunk := range chunks { 469 ok = ok && (chunk == "" || safeCgoName(chunk, false)) 470 } 471 ok = ok && (srcdir == "" || safeCgoName(srcdir, true)) 472 res := strings.Join(chunks, srcdir) 473 return res, ok && res != "" 474 } 475 476 // NOTE: $ is not safe for the shell, but it is allowed here because of linker options like -Wl,$ORIGIN. 477 // We never pass these arguments to a shell (just to programs we construct argv for), so this should be okay. 478 // See golang.org/issue/6038. 479 // The @ is for OS X. See golang.org/issue/13720. 480 // The % is for Jenkins. See golang.org/issue/16959. 481 const ( 482 safeString = "+-.,/0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz:$@%" 483 safeSpaces = " " 484 ) 485 486 var safeBytes = []byte(safeSpaces + safeString) 487 488 // Copied from go/build.safeCgoName 489 func safeCgoName(s string, spaces bool) bool { 490 if s == "" { 491 return false 492 } 493 safe := safeBytes 494 if !spaces { 495 safe = safe[len(safeSpaces):] 496 } 497 for i := 0; i < len(s); i++ { 498 if c := s[i]; c < utf8.RuneSelf && bytes.IndexByte(safe, c) < 0 { 499 return false 500 } 501 } 502 return true 503 } 504 505 func isOSArchSpecific(info fileInfo, cgoTags *cgoTagsAndOpts) (osSpecific, archSpecific bool) { 506 if info.goos != "" { 507 osSpecific = true 508 } 509 if info.goarch != "" { 510 archSpecific = true 511 } 512 513 checkTags := func(tags []string) { 514 for _, tag := range tags { 515 _, osOk := rule.KnownOSSet[tag] 516 if osOk || tag == "unix" { 517 osSpecific = true 518 } 519 _, archOk := rule.KnownArchSet[tag] 520 if archOk { 521 archSpecific = true 522 } 523 } 524 } 525 checkTags(info.tags.tags()) 526 if osSpecific && archSpecific { 527 return 528 } 529 530 checkTags(cgoTags.tags()) 531 return 532 } 533 534 // matchesOS checks if a value is equal to either an OS value or to any of its 535 // aliases. 536 func matchesOS(os, value string) bool { 537 if os == value { 538 return true 539 } 540 if value == "unix" { 541 return rule.UnixOS[os] 542 } 543 for _, alias := range rule.OSAliases[os] { 544 if alias == value { 545 return true 546 } 547 } 548 return false 549 } 550 551 // checkConstraints determines whether build constraints are satisfied on 552 // a given platform. 553 // 554 // The first few arguments describe the platform. genericTags is the set 555 // of build tags that are true on all platformConstraints. os and arch are the platform 556 // GOOS and GOARCH strings. If os or arch is empty, checkConstraints will 557 // return false in the presence of OS and architecture constraints, even 558 // if they are negated. 559 // 560 // The remaining arguments describe the file being tested. All of these may 561 // be empty or nil. osSuffix and archSuffix are filename suffixes. tags 562 // is the parsed build tags found near the top of the file. cgoTags 563 // is an extra set of tags in a #cgo directive. 564 func checkConstraints(c *config.Config, os, arch, osSuffix, archSuffix string, tags *buildTags, cgoTags *cgoTagsAndOpts) bool { 565 if osSuffix != "" && !matchesOS(os, osSuffix) || archSuffix != "" && archSuffix != arch { 566 return false 567 } 568 569 goConf := getGoConfig(c) 570 checker := func(tag string) bool { 571 if isIgnoredTag(tag) { 572 return true 573 } 574 if _, ok := rule.KnownOSSet[tag]; ok || tag == "unix" { 575 if os == "" { 576 return false 577 } 578 return matchesOS(os, tag) 579 } 580 581 if _, ok := rule.KnownArchSet[tag]; ok { 582 if arch == "" { 583 return false 584 } 585 return arch == tag 586 587 } 588 589 return goConf.genericTags[tag] 590 } 591 592 return tags.eval(checker) && cgoTags.eval(checker) 593 } 594 595 // rulesGoSupportsOS returns whether the os tag is recognized by the version of 596 // rules_go being used. This avoids incompatibility between new versions of 597 // Gazelle and old versions of rules_go. 598 func rulesGoSupportsOS(v version.Version, os string) bool { 599 if len(v) == 0 { 600 return true 601 } 602 if v.Compare(version.Version{0, 23, 0}) < 0 && 603 (os == "aix" || os == "illumos") { 604 return false 605 } 606 return true 607 } 608 609 // rulesGoSupportsArch returns whether the arch tag is recognized by the version 610 // of rules_go being used. This avoids incompatibility between new versions of 611 // Gazelle and old versions of rules_go. 612 func rulesGoSupportsArch(v version.Version, arch string) bool { 613 if len(v) == 0 { 614 return true 615 } 616 if v.Compare(version.Version{0, 23, 0}) < 0 && 617 arch == "riscv64" { 618 return false 619 } 620 return true 621 } 622 623 // rulesGoSupportsPlatform returns whether the os and arch tag combination is 624 // recognized by the version of rules_go being used. This avoids incompatibility 625 // between new versions of Gazelle and old versions of rules_go. 626 func rulesGoSupportsPlatform(v version.Version, p rule.Platform) bool { 627 if len(v) == 0 { 628 return true 629 } 630 if v.Compare(version.Version{0, 23, 0}) < 0 && 631 (p.OS == "aix" && p.Arch == "ppc64" || 632 p.OS == "freebsd" && p.Arch == "arm64" || 633 p.OS == "illumos" && p.Arch == "amd64" || 634 p.OS == "linux" && p.Arch == "riscv64" || 635 p.OS == "netbsd" && p.Arch == "arm64" || 636 p.OS == "openbsd" && p.Arch == "arm64" || 637 p.OS == "windows" && p.Arch == "arm" || 638 p.OS == "windows" && p.Arch == "arm64") { 639 return false 640 } 641 return true 642 } 643 644 // parseGoEmbed parses the text following "//go:embed" to extract the glob patterns. 645 // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings. 646 // This is based on a similar function in cmd/compile/internal/gc/noder.go; 647 // this version calculates position information as well. 648 // 649 // Copied from go/build.parseGoEmbed. 650 func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) { 651 trimBytes := func(n int) { 652 pos.Offset += n 653 pos.Column += utf8.RuneCountInString(args[:n]) 654 args = args[n:] 655 } 656 trimSpace := func() { 657 trim := strings.TrimLeftFunc(args, unicode.IsSpace) 658 trimBytes(len(args) - len(trim)) 659 } 660 661 var list []fileEmbed 662 for trimSpace(); args != ""; trimSpace() { 663 var path string 664 pathPos := pos 665 Switch: 666 switch args[0] { 667 default: 668 i := len(args) 669 for j, c := range args { 670 if unicode.IsSpace(c) { 671 i = j 672 break 673 } 674 } 675 path = args[:i] 676 trimBytes(i) 677 678 case '`': 679 i := strings.Index(args[1:], "`") 680 if i < 0 { 681 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 682 } 683 path = args[1 : 1+i] 684 trimBytes(1 + i + 1) 685 686 case '"': 687 i := 1 688 for ; i < len(args); i++ { 689 if args[i] == '\\' { 690 i++ 691 continue 692 } 693 if args[i] == '"' { 694 q, err := strconv.Unquote(args[:i+1]) 695 if err != nil { 696 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1]) 697 } 698 path = q 699 trimBytes(i + 1) 700 break Switch 701 } 702 } 703 if i >= len(args) { 704 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 705 } 706 } 707 708 if args != "" { 709 r, _ := utf8.DecodeRuneInString(args) 710 if !unicode.IsSpace(r) { 711 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) 712 } 713 } 714 list = append(list, fileEmbed{path, pathPos}) 715 } 716 return list, nil 717 }