github.com/kdevb0x/go@v0.0.0-20180115030120-39687051e9e7/src/cmd/go/internal/work/buildid.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package work 6 7 import ( 8 "bytes" 9 "fmt" 10 "io/ioutil" 11 "os" 12 "os/exec" 13 "strings" 14 15 "cmd/go/internal/base" 16 "cmd/go/internal/cache" 17 "cmd/go/internal/cfg" 18 "cmd/go/internal/load" 19 "cmd/go/internal/str" 20 "cmd/internal/buildid" 21 ) 22 23 // Build IDs 24 // 25 // Go packages and binaries are stamped with build IDs that record both 26 // the action ID, which is a hash of the inputs to the action that produced 27 // the packages or binary, and the content ID, which is a hash of the action 28 // output, namely the archive or binary itself. The hash is the same one 29 // used by the build artifact cache (see cmd/go/internal/cache), but 30 // truncated when stored in packages and binaries, as the full length is not 31 // needed and is a bit unwieldy. The precise form is 32 // 33 // actionID/[.../]contentID 34 // 35 // where the actionID and contentID are prepared by hashToString below. 36 // and are found by looking for the first or last slash. 37 // Usually the buildID is simply actionID/contentID, but see below for an 38 // exception. 39 // 40 // The build ID serves two primary purposes. 41 // 42 // 1. The action ID half allows installed packages and binaries to serve as 43 // one-element cache entries. If we intend to build math.a with a given 44 // set of inputs summarized in the action ID, and the installed math.a already 45 // has that action ID, we can reuse the installed math.a instead of rebuilding it. 46 // 47 // 2. The content ID half allows the easy preparation of action IDs for steps 48 // that consume a particular package or binary. The content hash of every 49 // input file for a given action must be included in the action ID hash. 50 // Storing the content ID in the build ID lets us read it from the file with 51 // minimal I/O, instead of reading and hashing the entire file. 52 // This is especially effective since packages and binaries are typically 53 // the largest inputs to an action. 54 // 55 // Separating action ID from content ID is important for reproducible builds. 56 // The compiler is compiled with itself. If an output were represented by its 57 // own action ID (instead of content ID) when computing the action ID of 58 // the next step in the build process, then the compiler could never have its 59 // own input action ID as its output action ID (short of a miraculous hash collision). 60 // Instead we use the content IDs to compute the next action ID, and because 61 // the content IDs converge, so too do the action IDs and therefore the 62 // build IDs and the overall compiler binary. See cmd/dist's cmdbootstrap 63 // for the actual convergence sequence. 64 // 65 // The “one-element cache” purpose is a bit more complex for installed 66 // binaries. For a binary, like cmd/gofmt, there are two steps: compile 67 // cmd/gofmt/*.go into main.a, and then link main.a into the gofmt binary. 68 // We do not install gofmt's main.a, only the gofmt binary. Being able to 69 // decide that the gofmt binary is up-to-date means computing the action ID 70 // for the final link of the gofmt binary and comparing it against the 71 // already-installed gofmt binary. But computing the action ID for the link 72 // means knowing the content ID of main.a, which we did not keep. 73 // To sidestep this problem, each binary actually stores an expanded build ID: 74 // 75 // actionID(binary)/actionID(main.a)/contentID(main.a)/contentID(binary) 76 // 77 // (Note that this can be viewed equivalently as: 78 // 79 // actionID(binary)/buildID(main.a)/contentID(binary) 80 // 81 // Storing the buildID(main.a) in the middle lets the computations that care 82 // about the prefix or suffix halves ignore the middle and preserves the 83 // original build ID as a contiguous string.) 84 // 85 // During the build, when it's time to build main.a, the gofmt binary has the 86 // information needed to decide whether the eventual link would produce 87 // the same binary: if the action ID for main.a's inputs matches and then 88 // the action ID for the link step matches when assuming the given main.a 89 // content ID, then the binary as a whole is up-to-date and need not be rebuilt. 90 // 91 // This is all a bit complex and may be simplified once we can rely on the 92 // main cache, but at least at the start we will be using the content-based 93 // staleness determination without a cache beyond the usual installed 94 // package and binary locations. 95 96 const buildIDSeparator = "/" 97 98 // actionID returns the action ID half of a build ID. 99 func actionID(buildID string) string { 100 i := strings.Index(buildID, buildIDSeparator) 101 if i < 0 { 102 return buildID 103 } 104 return buildID[:i] 105 } 106 107 // contentID returns the content ID half of a build ID. 108 func contentID(buildID string) string { 109 return buildID[strings.LastIndex(buildID, buildIDSeparator)+1:] 110 } 111 112 // hashToString converts the hash h to a string to be recorded 113 // in package archives and binaries as part of the build ID. 114 // We use the first 96 bits of the hash and encode it in base64, 115 // resulting in a 16-byte string. Because this is only used for 116 // detecting the need to rebuild installed files (not for lookups 117 // in the object file cache), 96 bits are sufficient to drive the 118 // probability of a false "do not need to rebuild" decision to effectively zero. 119 // We embed two different hashes in archives and four in binaries, 120 // so cutting to 16 bytes is a significant savings when build IDs are displayed. 121 // (16*4+3 = 67 bytes compared to 64*4+3 = 259 bytes for the 122 // more straightforward option of printing the entire h in hex). 123 func hashToString(h [cache.HashSize]byte) string { 124 const b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 125 const chunks = 5 126 var dst [chunks * 4]byte 127 for i := 0; i < chunks; i++ { 128 v := uint32(h[3*i])<<16 | uint32(h[3*i+1])<<8 | uint32(h[3*i+2]) 129 dst[4*i+0] = b64[(v>>18)&0x3F] 130 dst[4*i+1] = b64[(v>>12)&0x3F] 131 dst[4*i+2] = b64[(v>>6)&0x3F] 132 dst[4*i+3] = b64[v&0x3F] 133 } 134 return string(dst[:]) 135 } 136 137 // toolID returns the unique ID to use for the current copy of the 138 // named tool (asm, compile, cover, link). 139 // 140 // It is important that if the tool changes (for example a compiler bug is fixed 141 // and the compiler reinstalled), toolID returns a different string, so that old 142 // package archives look stale and are rebuilt (with the fixed compiler). 143 // This suggests using a content hash of the tool binary, as stored in the build ID. 144 // 145 // Unfortunately, we can't just open the tool binary, because the tool might be 146 // invoked via a wrapper program specified by -toolexec and we don't know 147 // what the wrapper program does. In particular, we want "-toolexec toolstash" 148 // to continue working: it does no good if "-toolexec toolstash" is executing a 149 // stashed copy of the compiler but the go command is acting as if it will run 150 // the standard copy of the compiler. The solution is to ask the tool binary to tell 151 // us its own build ID using the "-V=full" flag now supported by all tools. 152 // Then we know we're getting the build ID of the compiler that will actually run 153 // during the build. (How does the compiler binary know its own content hash? 154 // We store it there using updateBuildID after the standard link step.) 155 // 156 // A final twist is that we'd prefer to have reproducible builds for release toolchains. 157 // It should be possible to cross-compile for Windows from either Linux or Mac 158 // or Windows itself and produce the same binaries, bit for bit. If the tool ID, 159 // which influences the action ID half of the build ID, is based on the content ID, 160 // then the Linux compiler binary and Mac compiler binary will have different tool IDs 161 // and therefore produce executables with different action IDs. 162 // To avoids this problem, for releases we use the release version string instead 163 // of the compiler binary's content hash. This assumes that all compilers built 164 // on all different systems are semantically equivalent, which is of course only true 165 // modulo bugs. (Producing the exact same executables also requires that the different 166 // build setups agree on details like $GOROOT and file name paths, but at least the 167 // tool IDs do not make it impossible.) 168 func (b *Builder) toolID(name string) string { 169 b.id.Lock() 170 id := b.toolIDCache[name] 171 b.id.Unlock() 172 173 if id != "" { 174 return id 175 } 176 177 cmdline := str.StringList(cfg.BuildToolexec, base.Tool(name), "-V=full") 178 cmd := exec.Command(cmdline[0], cmdline[1:]...) 179 cmd.Env = base.EnvForDir(cmd.Dir, os.Environ()) 180 var stdout, stderr bytes.Buffer 181 cmd.Stdout = &stdout 182 cmd.Stderr = &stderr 183 if err := cmd.Run(); err != nil { 184 base.Fatalf("go tool %s: %v\n%s%s", name, err, stdout.Bytes(), stderr.Bytes()) 185 } 186 187 line := stdout.String() 188 f := strings.Fields(line) 189 if len(f) < 3 || f[0] != name || f[1] != "version" || f[2] == "devel" && !strings.HasPrefix(f[len(f)-1], "buildID=") { 190 base.Fatalf("go tool %s -V=full: unexpected output:\n\t%s", name, line) 191 } 192 if f[2] == "devel" { 193 // On the development branch, use the content ID part of the build ID. 194 id = contentID(f[len(f)-1]) 195 } else { 196 // For a release, the output is like: "compile version go1.9.1". Use the whole line. 197 id = f[2] 198 } 199 200 b.id.Lock() 201 b.toolIDCache[name] = id 202 b.id.Unlock() 203 204 return id 205 } 206 207 // gccToolID returns the unique ID to use for a tool that is invoked 208 // by the GCC driver. This is in particular gccgo, but this can also 209 // be used for gcc, g++, gfortran, etc.; those tools all use the GCC 210 // driver under different names. The approach used here should also 211 // work for sufficiently new versions of clang. Unlike toolID, the 212 // name argument is the program to run. The language argument is the 213 // type of input file as passed to the GCC driver's -x option. 214 // 215 // For these tools we have no -V=full option to dump the build ID, 216 // but we can run the tool with -v -### to reliably get the compiler proper 217 // and hash that. That will work in the presence of -toolexec. 218 // 219 // In order to get reproducible builds for released compilers, we 220 // detect a released compiler by the absence of "experimental" in the 221 // --version output, and in that case we just use the version string. 222 func (b *Builder) gccgoToolID(name, language string) (string, error) { 223 key := name + "." + language 224 b.id.Lock() 225 id := b.toolIDCache[key] 226 b.id.Unlock() 227 228 if id != "" { 229 return id, nil 230 } 231 232 // Invoke the driver with -### to see the subcommands and the 233 // version strings. Use -x to set the language. Pretend to 234 // compile an empty file on standard input. 235 cmdline := str.StringList(cfg.BuildToolexec, name, "-###", "-x", language, "-c", "-") 236 cmd := exec.Command(cmdline[0], cmdline[1:]...) 237 cmd.Env = base.EnvForDir(cmd.Dir, os.Environ()) 238 out, err := cmd.CombinedOutput() 239 if err != nil { 240 return "", fmt.Errorf("%s: %v; output: %q", name, err, out) 241 } 242 243 version := "" 244 lines := strings.Split(string(out), "\n") 245 for _, line := range lines { 246 if fields := strings.Fields(line); len(fields) > 1 && fields[1] == "version" { 247 version = line 248 break 249 } 250 } 251 if version == "" { 252 return "", fmt.Errorf("%s: can not find version number in %q", name, out) 253 } 254 255 if !strings.Contains(version, "experimental") { 256 // This is a release. Use this line as the tool ID. 257 id = version 258 } else { 259 // This is a development version. The first line with 260 // a leading space is the compiler proper. 261 compiler := "" 262 for _, line := range lines { 263 if len(line) > 1 && line[0] == ' ' { 264 compiler = line 265 break 266 } 267 } 268 if compiler == "" { 269 return "", fmt.Errorf("%s: can not find compilation command in %q", name, out) 270 } 271 272 fields := strings.Fields(compiler) 273 if len(fields) == 0 { 274 return "", fmt.Errorf("%s: compilation command confusion %q", name, out) 275 } 276 exe := fields[0] 277 if !strings.ContainsAny(exe, `/\`) { 278 if lp, err := exec.LookPath(exe); err == nil { 279 exe = lp 280 } 281 } 282 if _, err := os.Stat(exe); err != nil { 283 return "", fmt.Errorf("%s: can not find compiler %q: %v; output %q", name, exe, err, out) 284 } 285 id = b.fileHash(exe) 286 } 287 288 b.id.Lock() 289 b.toolIDCache[name] = id 290 b.id.Unlock() 291 292 return id, nil 293 } 294 295 // gccgoBuildIDELFFile creates an assembler file that records the 296 // action's build ID in an SHF_EXCLUDE section. 297 func (b *Builder) gccgoBuildIDELFFile(a *Action) (string, error) { 298 sfile := a.Objdir + "_buildid.s" 299 300 var buf bytes.Buffer 301 fmt.Fprintf(&buf, "\t"+`.section .go.buildid,"e"`+"\n") 302 fmt.Fprintf(&buf, "\t.byte ") 303 for i := 0; i < len(a.buildID); i++ { 304 if i > 0 { 305 if i%8 == 0 { 306 fmt.Fprintf(&buf, "\n\t.byte ") 307 } else { 308 fmt.Fprintf(&buf, ",") 309 } 310 } 311 fmt.Fprintf(&buf, "%#02x", a.buildID[i]) 312 } 313 fmt.Fprintf(&buf, "\n") 314 fmt.Fprintf(&buf, "\t"+`.section .note.GNU-stack,"",@progbits`+"\n") 315 fmt.Fprintf(&buf, "\t"+`.section .note.GNU-split-stack,"",@progbits`+"\n") 316 317 if cfg.BuildN || cfg.BuildX { 318 for _, line := range bytes.Split(buf.Bytes(), []byte("\n")) { 319 b.Showcmd("", "echo '%s' >> %s", line, sfile) 320 } 321 if cfg.BuildN { 322 return sfile, nil 323 } 324 } 325 326 if err := ioutil.WriteFile(sfile, buf.Bytes(), 0666); err != nil { 327 return "", err 328 } 329 330 return sfile, nil 331 } 332 333 // buildID returns the build ID found in the given file. 334 // If no build ID is found, buildID returns the content hash of the file. 335 func (b *Builder) buildID(file string) string { 336 b.id.Lock() 337 id := b.buildIDCache[file] 338 b.id.Unlock() 339 340 if id != "" { 341 return id 342 } 343 344 id, err := buildid.ReadFile(file) 345 if err != nil { 346 id = b.fileHash(file) 347 } 348 349 b.id.Lock() 350 b.buildIDCache[file] = id 351 b.id.Unlock() 352 353 return id 354 } 355 356 // fileHash returns the content hash of the named file. 357 func (b *Builder) fileHash(file string) string { 358 sum, err := cache.FileHash(file) 359 if err != nil { 360 return "" 361 } 362 return hashToString(sum) 363 } 364 365 // useCache tries to satisfy the action a, which has action ID actionHash, 366 // by using a cached result from an earlier build. At the moment, the only 367 // cached result is the installed package or binary at target. 368 // If useCache decides that the cache can be used, it sets a.buildID 369 // and a.built for use by parent actions and then returns true. 370 // Otherwise it sets a.buildID to a temporary build ID for use in the build 371 // and returns false. When useCache returns false the expectation is that 372 // the caller will build the target and then call updateBuildID to finish the 373 // build ID computation. 374 // When useCache returns false, it may have initiated buffering of output 375 // during a's work. The caller should defer b.flushOutput(a), to make sure 376 // that flushOutput is eventually called regardless of whether the action 377 // succeeds. The flushOutput call must happen after updateBuildID. 378 func (b *Builder) useCache(a *Action, p *load.Package, actionHash cache.ActionID, target string) bool { 379 // The second half of the build ID here is a placeholder for the content hash. 380 // It's important that the overall buildID be unlikely verging on impossible 381 // to appear in the output by chance, but that should be taken care of by 382 // the actionID half; if it also appeared in the input that would be like an 383 // engineered 96-bit partial SHA256 collision. 384 a.actionID = actionHash 385 actionID := hashToString(actionHash) 386 contentID := actionID // temporary placeholder, likely unique 387 a.buildID = actionID + buildIDSeparator + contentID 388 389 // Executable binaries also record the main build ID in the middle. 390 // See "Build IDs" comment above. 391 if a.Mode == "link" { 392 mainpkg := a.Deps[0] 393 a.buildID = actionID + buildIDSeparator + mainpkg.buildID + buildIDSeparator + contentID 394 } 395 396 // Check to see if target exists and matches the expected action ID. 397 // If so, it's up to date and we can reuse it instead of rebuilding it. 398 var buildID string 399 if target != "" && !cfg.BuildA { 400 var err error 401 buildID, err = buildid.ReadFile(target) 402 if err != nil && b.ComputeStaleOnly { 403 if p != nil && !p.Stale { 404 p.Stale = true 405 p.StaleReason = "target missing" 406 } 407 return true 408 } 409 if strings.HasPrefix(buildID, actionID+buildIDSeparator) { 410 a.buildID = buildID 411 a.built = target 412 // Poison a.Target to catch uses later in the build. 413 a.Target = "DO NOT USE - " + a.Mode 414 return true 415 } 416 } 417 418 // Special case for building a main package: if the only thing we 419 // want the package for is to link a binary, and the binary is 420 // already up-to-date, then to avoid a rebuild, report the package 421 // as up-to-date as well. See "Build IDs" comment above. 422 // TODO(rsc): Rewrite this code to use a TryCache func on the link action. 423 if target != "" && !cfg.BuildA && a.Mode == "build" && len(a.triggers) == 1 && a.triggers[0].Mode == "link" { 424 buildID, err := buildid.ReadFile(target) 425 if err == nil { 426 id := strings.Split(buildID, buildIDSeparator) 427 if len(id) == 4 && id[1] == actionID { 428 // Temporarily assume a.buildID is the package build ID 429 // stored in the installed binary, and see if that makes 430 // the upcoming link action ID a match. If so, report that 431 // we built the package, safe in the knowledge that the 432 // link step will not ask us for the actual package file. 433 // Note that (*Builder).LinkAction arranged that all of 434 // a.triggers[0]'s dependencies other than a are also 435 // dependencies of a, so that we can be sure that, 436 // other than a.buildID, b.linkActionID is only accessing 437 // build IDs of completed actions. 438 oldBuildID := a.buildID 439 a.buildID = id[1] + buildIDSeparator + id[2] 440 linkID := hashToString(b.linkActionID(a.triggers[0])) 441 if id[0] == linkID { 442 // Poison a.Target to catch uses later in the build. 443 a.Target = "DO NOT USE - main build pseudo-cache Target" 444 a.built = "DO NOT USE - main build pseudo-cache built" 445 return true 446 } 447 // Otherwise restore old build ID for main build. 448 a.buildID = oldBuildID 449 } 450 } 451 } 452 453 // Special case for linking a test binary: if the only thing we 454 // want the binary for is to run the test, and the test result is cached, 455 // then to avoid the link step, report the link as up-to-date. 456 // We avoid the nested build ID problem in the previous special case 457 // by recording the test results in the cache under the action ID half. 458 if !cfg.BuildA && len(a.triggers) == 1 && a.triggers[0].TryCache != nil && a.triggers[0].TryCache(b, a.triggers[0]) { 459 a.Target = "DO NOT USE - pseudo-cache Target" 460 a.built = "DO NOT USE - pseudo-cache built" 461 return true 462 } 463 464 if b.ComputeStaleOnly { 465 // Invoked during go list only to compute and record staleness. 466 if p := a.Package; p != nil && !p.Stale { 467 p.Stale = true 468 if cfg.BuildA { 469 p.StaleReason = "build -a flag in use" 470 } else { 471 p.StaleReason = "build ID mismatch" 472 for _, p1 := range p.Internal.Imports { 473 if p1.Stale && p1.StaleReason != "" { 474 if strings.HasPrefix(p1.StaleReason, "stale dependency: ") { 475 p.StaleReason = p1.StaleReason 476 break 477 } 478 if strings.HasPrefix(p.StaleReason, "build ID mismatch") { 479 p.StaleReason = "stale dependency: " + p1.ImportPath 480 } 481 } 482 } 483 } 484 } 485 return true 486 } 487 488 // Check the build artifact cache. 489 // We treat hits in this cache as being "stale" for the purposes of go list 490 // (in effect, "stale" means whether p.Target is up-to-date), 491 // but we're still happy to use results from the build artifact cache. 492 if c := cache.Default(); c != nil { 493 if !cfg.BuildA { 494 entry, err := c.Get(actionHash) 495 if err == nil { 496 file := c.OutputFile(entry.OutputID) 497 info, err1 := os.Stat(file) 498 buildID, err2 := buildid.ReadFile(file) 499 if err1 == nil && err2 == nil && info.Size() == entry.Size { 500 stdout, stdoutEntry, err := c.GetBytes(cache.Subkey(a.actionID, "stdout")) 501 if err == nil { 502 if len(stdout) > 0 { 503 if cfg.BuildX || cfg.BuildN { 504 b.Showcmd("", "%s # internal", joinUnambiguously(str.StringList("cat", c.OutputFile(stdoutEntry.OutputID)))) 505 } 506 if !cfg.BuildN { 507 b.Print(string(stdout)) 508 } 509 } 510 a.built = file 511 a.Target = "DO NOT USE - using cache" 512 a.buildID = buildID 513 return true 514 } 515 } 516 } 517 } 518 519 // Begin saving output for later writing to cache. 520 a.output = []byte{} 521 } 522 523 return false 524 } 525 526 // flushOutput flushes the output being queued in a. 527 func (b *Builder) flushOutput(a *Action) { 528 b.Print(string(a.output)) 529 a.output = nil 530 } 531 532 // updateBuildID updates the build ID in the target written by action a. 533 // It requires that useCache was called for action a and returned false, 534 // and that the build was then carried out and given the temporary 535 // a.buildID to record as the build ID in the resulting package or binary. 536 // updateBuildID computes the final content ID and updates the build IDs 537 // in the binary. 538 // 539 // Keep in sync with src/cmd/buildid/buildid.go 540 func (b *Builder) updateBuildID(a *Action, target string, rewrite bool) error { 541 if cfg.BuildX || cfg.BuildN { 542 if rewrite { 543 b.Showcmd("", "%s # internal", joinUnambiguously(str.StringList(base.Tool("buildid"), "-w", target))) 544 } 545 if cfg.BuildN { 546 return nil 547 } 548 } 549 550 // Find occurrences of old ID and compute new content-based ID. 551 r, err := os.Open(target) 552 if err != nil { 553 return err 554 } 555 matches, hash, err := buildid.FindAndHash(r, a.buildID, 0) 556 r.Close() 557 if err != nil { 558 return err 559 } 560 newID := a.buildID[:strings.LastIndex(a.buildID, buildIDSeparator)] + buildIDSeparator + hashToString(hash) 561 if len(newID) != len(a.buildID) { 562 return fmt.Errorf("internal error: build ID length mismatch %q vs %q", a.buildID, newID) 563 } 564 565 // Replace with new content-based ID. 566 a.buildID = newID 567 if len(matches) == 0 { 568 // Assume the user specified -buildid= to override what we were going to choose. 569 return nil 570 } 571 572 if rewrite { 573 w, err := os.OpenFile(target, os.O_WRONLY, 0) 574 if err != nil { 575 return err 576 } 577 err = buildid.Rewrite(w, matches, newID) 578 if err != nil { 579 w.Close() 580 return err 581 } 582 if err := w.Close(); err != nil { 583 return err 584 } 585 } 586 587 // Cache package builds, but not binaries (link steps). 588 // The expectation is that binaries are not reused 589 // nearly as often as individual packages, and they're 590 // much larger, so the cache-footprint-to-utility ratio 591 // of binaries is much lower for binaries. 592 // Not caching the link step also makes sure that repeated "go run" at least 593 // always rerun the linker, so that they don't get too fast. 594 // (We don't want people thinking go is a scripting language.) 595 // Note also that if we start caching binaries, then we will 596 // copy the binaries out of the cache to run them, and then 597 // that will mean the go process is itself writing a binary 598 // and then executing it, so we will need to defend against 599 // ETXTBSY problems as discussed in exec.go and golang.org/issue/22220. 600 if c := cache.Default(); c != nil && a.Mode == "build" { 601 r, err := os.Open(target) 602 if err == nil { 603 if a.output == nil { 604 panic("internal error: a.output not set") 605 } 606 outputID, _, err := c.Put(a.actionID, r) 607 if err == nil && cfg.BuildX { 608 b.Showcmd("", "%s # internal", joinUnambiguously(str.StringList("cp", target, c.OutputFile(outputID)))) 609 } 610 c.PutBytes(cache.Subkey(a.actionID, "stdout"), a.output) 611 r.Close() 612 } 613 } 614 615 return nil 616 }