github.com/wolfd/bazel-gazelle@v0.14.0/internal/language/go/fileinfo.go (about) 1 /* Copyright 2018 The Bazel Authors. All rights reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 */ 15 16 package golang 17 18 import ( 19 "bufio" 20 "bytes" 21 "errors" 22 "fmt" 23 "go/ast" 24 "go/parser" 25 "go/token" 26 "log" 27 "os" 28 "path" 29 "path/filepath" 30 "strconv" 31 "strings" 32 "unicode" 33 "unicode/utf8" 34 35 "github.com/bazelbuild/bazel-gazelle/internal/config" 36 "github.com/bazelbuild/bazel-gazelle/internal/language/proto" 37 "github.com/bazelbuild/bazel-gazelle/internal/rule" 38 ) 39 40 // fileInfo holds information used to decide how to build a file. This 41 // information comes from the file's name, from package and import declarations 42 // (in .go files), and from +build and cgo comments. 43 type fileInfo struct { 44 path string 45 name string 46 47 // ext is the type of file, based on extension. 48 ext ext 49 50 // packageName is the Go package name of a .go file, without the 51 // "_test" suffix if it was present. It is empty for non-Go files. 52 packageName string 53 54 // importPath is the canonical import path for this file's package. 55 // This may be read from a package comment (in Go) or a go_package 56 // option (in proto). This field is empty for files that don't specify 57 // an import path. 58 importPath string 59 60 // isTest is true if the file stem (the part before the extension) 61 // ends with "_test.go". This is never true for non-Go files. 62 isTest bool 63 64 // imports is a list of packages imported by a file. It does not include 65 // "C" or anything from the standard library. 66 imports []string 67 68 // isCgo is true for .go files that import "C". 69 isCgo bool 70 71 // goos and goarch contain the OS and architecture suffixes in the filename, 72 // if they were present. 73 goos, goarch string 74 75 // tags is a list of build tag lines. Each entry is the trimmed text of 76 // a line after a "+build" prefix. 77 tags []tagLine 78 79 // copts and clinkopts contain flags that are part of CFLAGS, CPPFLAGS, 80 // CXXFLAGS, and LDFLAGS directives in cgo comments. 81 copts, clinkopts []taggedOpts 82 83 // hasServices indicates whether a .proto file has service definitions. 84 hasServices bool 85 } 86 87 // tagLine represents the space-separated disjunction of build tag groups 88 // in a line comment. 89 type tagLine []tagGroup 90 91 // check returns true if at least one of the tag groups is satisfied. 92 func (l tagLine) check(c *config.Config, os, arch string) bool { 93 if len(l) == 0 { 94 return false 95 } 96 for _, g := range l { 97 if g.check(c, os, arch) { 98 return true 99 } 100 } 101 return false 102 } 103 104 // tagGroup represents a comma-separated conjuction of build tags. 105 type tagGroup []string 106 107 // check returns true if all of the tags are true. Tags that start with 108 // "!" are negated (but "!!") is not allowed. Go release tags (e.g., "go1.8") 109 // are ignored. If the group contains an os or arch tag, but the os or arch 110 // parameters are empty, check returns false even if the tag is negated. 111 func (g tagGroup) check(c *config.Config, os, arch string) bool { 112 goConf := getGoConfig(c) 113 for _, t := range g { 114 if strings.HasPrefix(t, "!!") { // bad syntax, reject always 115 return false 116 } 117 not := strings.HasPrefix(t, "!") 118 if not { 119 t = t[1:] 120 } 121 if isIgnoredTag(t) { 122 // Release tags are treated as "unknown" and are considered true, 123 // whether or not they are negated. 124 continue 125 } 126 var match bool 127 if _, ok := rule.KnownOSSet[t]; ok { 128 if os == "" { 129 return false 130 } 131 match = os == t 132 } else if _, ok := rule.KnownArchSet[t]; ok { 133 if arch == "" { 134 return false 135 } 136 match = arch == t 137 } else { 138 match = goConf.genericTags[t] 139 } 140 if not { 141 match = !match 142 } 143 if !match { 144 return false 145 } 146 } 147 return true 148 } 149 150 // taggedOpts a list of compile or link options which should only be applied 151 // if the given set of build tags are satisfied. These options have already 152 // been tokenized using the same algorithm that "go build" uses, then joined 153 // with OptSeparator. 154 type taggedOpts struct { 155 tags tagLine 156 opts string 157 } 158 159 // optSeparator is a special character inserted between options that appeared 160 // together in a #cgo directive. This allows options to be split, modified, 161 // and escaped by other packages. 162 // 163 // It's important to keep options grouped together in the same string. For 164 // example, if we have "-framework IOKit" together in a #cgo directive, 165 // "-framework" shouldn't be treated as a separate string for the purposes of 166 // sorting and de-duplicating. 167 const optSeparator = "\x1D" 168 169 // ext indicates how a file should be treated, based on extension. 170 type ext int 171 172 const ( 173 // unknownExt is applied files that aren't buildable with Go. 174 unknownExt ext = iota 175 176 // goExt is applied to .go files. 177 goExt 178 179 // cExt is applied to C and C++ files. 180 cExt 181 182 // hExt is applied to header files. If cgo code is present, these may be 183 // C or C++ headers. If not, they are treated as Go assembly headers. 184 hExt 185 186 // sExt is applied to Go assembly files, ending with .s. 187 sExt 188 189 // csExt is applied to other assembly files, ending with .S. These are built 190 // with the C compiler if cgo code is present. 191 csExt 192 193 // protoExt is applied to .proto files. 194 protoExt 195 ) 196 197 // fileNameInfo returns information that can be inferred from the name of 198 // a file. It does not read data from the file. 199 func fileNameInfo(path_ string) fileInfo { 200 name := filepath.Base(path_) 201 var ext ext 202 switch path.Ext(name) { 203 case ".go": 204 ext = goExt 205 case ".c", ".cc", ".cpp", ".cxx", ".m", ".mm": 206 ext = cExt 207 case ".h", ".hh", ".hpp", ".hxx": 208 ext = hExt 209 case ".s": 210 ext = sExt 211 case ".S": 212 ext = csExt 213 case ".proto": 214 ext = protoExt 215 default: 216 ext = unknownExt 217 } 218 219 // Determine test, goos, and goarch. This is intended to match the logic 220 // in goodOSArchFile in go/build. 221 var isTest bool 222 var goos, goarch string 223 l := strings.Split(name[:len(name)-len(path.Ext(name))], "_") 224 if len(l) >= 2 && l[len(l)-1] == "test" { 225 isTest = ext == goExt 226 l = l[:len(l)-1] 227 } 228 switch { 229 case len(l) >= 3 && rule.KnownOSSet[l[len(l)-2]] && rule.KnownArchSet[l[len(l)-1]]: 230 goos = l[len(l)-2] 231 goarch = l[len(l)-1] 232 case len(l) >= 2 && rule.KnownOSSet[l[len(l)-1]]: 233 goos = l[len(l)-1] 234 case len(l) >= 2 && rule.KnownArchSet[l[len(l)-1]]: 235 goarch = l[len(l)-1] 236 } 237 238 return fileInfo{ 239 path: path_, 240 name: name, 241 ext: ext, 242 isTest: isTest, 243 goos: goos, 244 goarch: goarch, 245 } 246 } 247 248 // otherFileInfo returns information about a non-.go file. It will parse 249 // part of the file to determine build tags. If the file can't be read, an 250 // error will be logged, and partial information will be returned. 251 func otherFileInfo(path string) fileInfo { 252 info := fileNameInfo(path) 253 if info.ext == unknownExt { 254 return info 255 } 256 257 tags, err := readTags(info.path) 258 if err != nil { 259 log.Printf("%s: error reading file: %v", info.path, err) 260 return info 261 } 262 info.tags = tags 263 return info 264 } 265 266 // goFileInfo returns information about a .go file. It will parse part of the 267 // file to determine the package name, imports, and build constraints. 268 // If the file can't be read, an error will be logged, and partial information 269 // will be returned. 270 // This function is intended to match go/build.Context.Import. 271 // TODD(#53): extract canonical import path 272 func goFileInfo(path, rel string) fileInfo { 273 info := fileNameInfo(path) 274 fset := token.NewFileSet() 275 pf, err := parser.ParseFile(fset, info.path, nil, parser.ImportsOnly|parser.ParseComments) 276 if err != nil { 277 log.Printf("%s: error reading go file: %v", info.path, err) 278 return info 279 } 280 281 info.packageName = pf.Name.Name 282 if info.isTest && strings.HasSuffix(info.packageName, "_test") { 283 info.packageName = info.packageName[:len(info.packageName)-len("_test")] 284 } 285 286 for _, decl := range pf.Decls { 287 d, ok := decl.(*ast.GenDecl) 288 if !ok { 289 continue 290 } 291 for _, dspec := range d.Specs { 292 spec, ok := dspec.(*ast.ImportSpec) 293 if !ok { 294 continue 295 } 296 quoted := spec.Path.Value 297 path, err := strconv.Unquote(quoted) 298 if err != nil { 299 log.Printf("%s: error reading go file: %v", info.path, err) 300 continue 301 } 302 303 if path == "C" { 304 if info.isTest { 305 log.Printf("%s: warning: use of cgo in test not supported", info.path) 306 } 307 info.isCgo = true 308 cg := spec.Doc 309 if cg == nil && len(d.Specs) == 1 { 310 cg = d.Doc 311 } 312 if cg != nil { 313 if err := saveCgo(&info, rel, cg); err != nil { 314 log.Printf("%s: error reading go file: %v", info.path, err) 315 } 316 } 317 continue 318 } 319 info.imports = append(info.imports, path) 320 } 321 } 322 323 tags, err := readTags(info.path) 324 if err != nil { 325 log.Printf("%s: error reading go file: %v", info.path, err) 326 return info 327 } 328 info.tags = tags 329 330 return info 331 } 332 333 // saveCgo extracts CFLAGS, CPPFLAGS, CXXFLAGS, and LDFLAGS directives 334 // from a comment above a "C" import. This is intended to match logic in 335 // go/build.Context.saveCgo. 336 func saveCgo(info *fileInfo, rel string, cg *ast.CommentGroup) error { 337 text := cg.Text() 338 for _, line := range strings.Split(text, "\n") { 339 orig := line 340 341 // Line is 342 // #cgo [GOOS/GOARCH...] LDFLAGS: stuff 343 // 344 line = strings.TrimSpace(line) 345 if len(line) < 5 || line[:4] != "#cgo" || (line[4] != ' ' && line[4] != '\t') { 346 continue 347 } 348 349 // Split at colon. 350 line = strings.TrimSpace(line[4:]) 351 i := strings.Index(line, ":") 352 if i < 0 { 353 return fmt.Errorf("%s: invalid #cgo line: %s", info.path, orig) 354 } 355 line, optstr := strings.TrimSpace(line[:i]), strings.TrimSpace(line[i+1:]) 356 357 // Parse tags and verb. 358 f := strings.Fields(line) 359 if len(f) < 1 { 360 return fmt.Errorf("%s: invalid #cgo line: %s", info.path, orig) 361 } 362 verb := f[len(f)-1] 363 tags := parseTagsInGroups(f[:len(f)-1]) 364 365 // Parse options. 366 opts, err := splitQuoted(optstr) 367 if err != nil { 368 return fmt.Errorf("%s: invalid #cgo line: %s", info.path, orig) 369 } 370 var ok bool 371 for i, opt := range opts { 372 if opt, ok = expandSrcDir(opt, rel); !ok { 373 return fmt.Errorf("%s: malformed #cgo argument: %s", info.path, orig) 374 } 375 opts[i] = opt 376 } 377 joinedStr := strings.Join(opts, optSeparator) 378 379 // Add tags to appropriate list. 380 switch verb { 381 case "CFLAGS", "CPPFLAGS", "CXXFLAGS": 382 info.copts = append(info.copts, taggedOpts{tags, joinedStr}) 383 case "LDFLAGS": 384 info.clinkopts = append(info.clinkopts, taggedOpts{tags, joinedStr}) 385 case "pkg-config": 386 return fmt.Errorf("%s: pkg-config not supported: %s", info.path, orig) 387 default: 388 return fmt.Errorf("%s: invalid #cgo verb: %s", info.path, orig) 389 } 390 } 391 return nil 392 } 393 394 // splitQuoted splits the string s around each instance of one or more consecutive 395 // white space characters while taking into account quotes and escaping, and 396 // returns an array of substrings of s or an empty list if s contains only white space. 397 // Single quotes and double quotes are recognized to prevent splitting within the 398 // quoted region, and are removed from the resulting substrings. If a quote in s 399 // isn't closed err will be set and r will have the unclosed argument as the 400 // last element. The backslash is used for escaping. 401 // 402 // For example, the following string: 403 // 404 // a b:"c d" 'e''f' "g\"" 405 // 406 // Would be parsed as: 407 // 408 // []string{"a", "b:c d", "ef", `g"`} 409 // 410 // Copied from go/build.splitQuoted 411 func splitQuoted(s string) (r []string, err error) { 412 var args []string 413 arg := make([]rune, len(s)) 414 escaped := false 415 quoted := false 416 quote := '\x00' 417 i := 0 418 for _, rune := range s { 419 switch { 420 case escaped: 421 escaped = false 422 case rune == '\\': 423 escaped = true 424 continue 425 case quote != '\x00': 426 if rune == quote { 427 quote = '\x00' 428 continue 429 } 430 case rune == '"' || rune == '\'': 431 quoted = true 432 quote = rune 433 continue 434 case unicode.IsSpace(rune): 435 if quoted || i > 0 { 436 quoted = false 437 args = append(args, string(arg[:i])) 438 i = 0 439 } 440 continue 441 } 442 arg[i] = rune 443 i++ 444 } 445 if quoted || i > 0 { 446 args = append(args, string(arg[:i])) 447 } 448 if quote != 0 { 449 err = errors.New("unclosed quote") 450 } else if escaped { 451 err = errors.New("unfinished escaping") 452 } 453 return args, err 454 } 455 456 // expandSrcDir expands any occurrence of ${SRCDIR}, making sure 457 // the result is safe for the shell. 458 // 459 // Copied from go/build.expandSrcDir 460 func expandSrcDir(str string, srcdir string) (string, bool) { 461 // "\" delimited paths cause safeCgoName to fail 462 // so convert native paths with a different delimiter 463 // to "/" before starting (eg: on windows). 464 srcdir = filepath.ToSlash(srcdir) 465 466 // Spaces are tolerated in ${SRCDIR}, but not anywhere else. 467 chunks := strings.Split(str, "${SRCDIR}") 468 if len(chunks) < 2 { 469 return str, safeCgoName(str, false) 470 } 471 ok := true 472 for _, chunk := range chunks { 473 ok = ok && (chunk == "" || safeCgoName(chunk, false)) 474 } 475 ok = ok && (srcdir == "" || safeCgoName(srcdir, true)) 476 res := strings.Join(chunks, srcdir) 477 return res, ok && res != "" 478 } 479 480 // NOTE: $ is not safe for the shell, but it is allowed here because of linker options like -Wl,$ORIGIN. 481 // We never pass these arguments to a shell (just to programs we construct argv for), so this should be okay. 482 // See golang.org/issue/6038. 483 // The @ is for OS X. See golang.org/issue/13720. 484 // The % is for Jenkins. See golang.org/issue/16959. 485 const safeString = "+-.,/0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz:$@%" 486 const safeSpaces = " " 487 488 var safeBytes = []byte(safeSpaces + safeString) 489 490 // Copied from go/build.safeCgoName 491 func safeCgoName(s string, spaces bool) bool { 492 if s == "" { 493 return false 494 } 495 safe := safeBytes 496 if !spaces { 497 safe = safe[len(safeSpaces):] 498 } 499 for i := 0; i < len(s); i++ { 500 if c := s[i]; c < utf8.RuneSelf && bytes.IndexByte(safe, c) < 0 { 501 return false 502 } 503 } 504 return true 505 } 506 507 // readTags reads and extracts build tags from the block of comments 508 // and blank lines at the start of a file which is separated from the 509 // rest of the file by a blank line. Each string in the returned slice 510 // is the trimmed text of a line after a "+build" prefix. 511 // Based on go/build.Context.shouldBuild. 512 func readTags(path string) ([]tagLine, error) { 513 f, err := os.Open(path) 514 if err != nil { 515 return nil, err 516 } 517 defer f.Close() 518 scanner := bufio.NewScanner(f) 519 520 // Pass 1: Identify leading run of // comments and blank lines, 521 // which must be followed by a blank line. 522 var lines []string 523 end := 0 524 for scanner.Scan() { 525 line := strings.TrimSpace(scanner.Text()) 526 if line == "" { 527 end = len(lines) 528 continue 529 } 530 if strings.HasPrefix(line, "//") { 531 lines = append(lines, line[len("//"):]) 532 continue 533 } 534 break 535 } 536 if err := scanner.Err(); err != nil { 537 return nil, err 538 } 539 lines = lines[:end] 540 541 // Pass 2: Process each line in the run. 542 var tagLines []tagLine 543 for _, line := range lines { 544 fields := strings.Fields(line) 545 if len(fields) > 0 && fields[0] == "+build" { 546 tagLines = append(tagLines, parseTagsInGroups(fields[1:])) 547 } 548 } 549 return tagLines, nil 550 } 551 552 func parseTagsInGroups(groups []string) tagLine { 553 var l tagLine 554 for _, g := range groups { 555 l = append(l, tagGroup(strings.Split(g, ","))) 556 } 557 return l 558 } 559 560 func isOSArchSpecific(info fileInfo, cgoTags tagLine) (osSpecific, archSpecific bool) { 561 if info.goos != "" { 562 osSpecific = true 563 } 564 if info.goarch != "" { 565 archSpecific = true 566 } 567 lines := info.tags 568 if len(cgoTags) > 0 { 569 lines = append(lines, cgoTags) 570 } 571 for _, line := range lines { 572 for _, group := range line { 573 for _, tag := range group { 574 if strings.HasPrefix(tag, "!") { 575 tag = tag[1:] 576 } 577 _, osOk := rule.KnownOSSet[tag] 578 if osOk { 579 osSpecific = true 580 } 581 _, archOk := rule.KnownArchSet[tag] 582 if archOk { 583 archSpecific = true 584 } 585 } 586 } 587 } 588 return osSpecific, archSpecific 589 } 590 591 // checkConstraints determines whether build constraints are satisfied on 592 // a given platform. 593 // 594 // The first few arguments describe the platform. genericTags is the set 595 // of build tags that are true on all platforms. os and arch are the platform 596 // GOOS and GOARCH strings. If os or arch is empty, checkConstraints will 597 // return false in the presence of OS and architecture constraints, even 598 // if they are negated. 599 // 600 // The remaining arguments describe the file being tested. All of these may 601 // be empty or nil. osSuffix and archSuffix are filename suffixes. fileTags 602 // is a list tags from +build comments found near the top of the file. cgoTags 603 // is an extra set of tags in a #cgo directive. 604 func checkConstraints(c *config.Config, os, arch, osSuffix, archSuffix string, fileTags []tagLine, cgoTags tagLine) bool { 605 if osSuffix != "" && osSuffix != os || archSuffix != "" && archSuffix != arch { 606 return false 607 } 608 for _, l := range fileTags { 609 if !l.check(c, os, arch) { 610 return false 611 } 612 } 613 if len(cgoTags) > 0 && !cgoTags.check(c, os, arch) { 614 return false 615 } 616 return true 617 } 618 619 // isIgnoredTag returns whether the tag is "cgo" or is a release tag. 620 // Release tags match the pattern "go[0-9]\.[0-9]+". 621 // Gazelle won't consider whether an ignored tag is satisfied when evaluating 622 // build constraints for a file. 623 func isIgnoredTag(tag string) bool { 624 if tag == "cgo" || tag == "race" || tag == "msan" { 625 return true 626 } 627 if len(tag) < 5 || !strings.HasPrefix(tag, "go") { 628 return false 629 } 630 if tag[2] < '0' || tag[2] > '9' || tag[3] != '.' { 631 return false 632 } 633 for _, c := range tag[4:] { 634 if c < '0' || c > '9' { 635 return false 636 } 637 } 638 return true 639 } 640 641 // protoFileInfo extracts metadata from a proto file. The proto extension 642 // already "parses" these and stores metadata in proto.FileInfo, so this is 643 // just processing relevant options. 644 func protoFileInfo(path_ string, protoInfo proto.FileInfo) fileInfo { 645 info := fileNameInfo(path_) 646 647 // Look for "option go_package". If there's no / in the package option, then 648 // it's just a simple package name, not a full import path. 649 for _, opt := range protoInfo.Options { 650 if opt.Key != "go_package" { 651 continue 652 } 653 if strings.LastIndexByte(opt.Value, '/') == -1 { 654 info.packageName = opt.Value 655 } else { 656 if i := strings.LastIndexByte(opt.Value, ';'); i != -1 { 657 info.importPath = opt.Value[:i] 658 info.packageName = opt.Value[i+1:] 659 } else { 660 info.importPath = opt.Value 661 info.packageName = path.Base(opt.Value) 662 } 663 } 664 } 665 666 // Set the Go package name from the proto package name if there was no 667 // option go_package. 668 if info.packageName == "" && protoInfo.PackageName != "" { 669 info.packageName = strings.Replace(protoInfo.PackageName, ".", "_", -1) 670 } 671 672 info.imports = protoInfo.Imports 673 info.hasServices = protoInfo.HasServices 674 return info 675 }