github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/cmd/go/internal/work/buildid.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package work 6 7 import ( 8 "bytes" 9 "fmt" 10 "os" 11 "os/exec" 12 "strings" 13 14 "cmd/go/internal/base" 15 "cmd/go/internal/cache" 16 "cmd/go/internal/cfg" 17 "cmd/go/internal/load" 18 "cmd/go/internal/str" 19 "cmd/internal/buildid" 20 ) 21 22 // Build IDs 23 // 24 // Go packages and binaries are stamped with build IDs that record both 25 // the action ID, which is a hash of the inputs to the action that produced 26 // the packages or binary, and the content ID, which is a hash of the action 27 // output, namely the archive or binary itself. The hash is the same one 28 // used by the build artifact cache (see cmd/go/internal/cache), but 29 // truncated when stored in packages and binaries, as the full length is not 30 // needed and is a bit unwieldy. The precise form is 31 // 32 // actionID/[.../]contentID 33 // 34 // where the actionID and contentID are prepared by hashToString below. 35 // and are found by looking for the first or last slash. 36 // Usually the buildID is simply actionID/contentID, but see below for an 37 // exception. 38 // 39 // The build ID serves two primary purposes. 40 // 41 // 1. The action ID half allows installed packages and binaries to serve as 42 // one-element cache entries. If we intend to build math.a with a given 43 // set of inputs summarized in the action ID, and the installed math.a already 44 // has that action ID, we can reuse the installed math.a instead of rebuilding it. 45 // 46 // 2. The content ID half allows the easy preparation of action IDs for steps 47 // that consume a particular package or binary. The content hash of every 48 // input file for a given action must be included in the action ID hash. 49 // Storing the content ID in the build ID lets us read it from the file with 50 // minimal I/O, instead of reading and hashing the entire file. 51 // This is especially effective since packages and binaries are typically 52 // the largest inputs to an action. 53 // 54 // Separating action ID from content ID is important for reproducible builds. 55 // The compiler is compiled with itself. If an output were represented by its 56 // own action ID (instead of content ID) when computing the action ID of 57 // the next step in the build process, then the compiler could never have its 58 // own input action ID as its output action ID (short of a miraculous hash collision). 59 // Instead we use the content IDs to compute the next action ID, and because 60 // the content IDs converge, so too do the action IDs and therefore the 61 // build IDs and the overall compiler binary. See cmd/dist's cmdbootstrap 62 // for the actual convergence sequence. 63 // 64 // The “one-element cache” purpose is a bit more complex for installed 65 // binaries. For a binary, like cmd/gofmt, there are two steps: compile 66 // cmd/gofmt/*.go into main.a, and then link main.a into the gofmt binary. 67 // We do not install gofmt's main.a, only the gofmt binary. Being able to 68 // decide that the gofmt binary is up-to-date means computing the action ID 69 // for the final link of the gofmt binary and comparing it against the 70 // already-installed gofmt binary. But computing the action ID for the link 71 // means knowing the content ID of main.a, which we did not keep. 72 // To sidestep this problem, each binary actually stores an expanded build ID: 73 // 74 // actionID(binary)/actionID(main.a)/contentID(main.a)/contentID(binary) 75 // 76 // (Note that this can be viewed equivalently as: 77 // 78 // actionID(binary)/buildID(main.a)/contentID(binary) 79 // 80 // Storing the buildID(main.a) in the middle lets the computations that care 81 // about the prefix or suffix halves ignore the middle and preserves the 82 // original build ID as a contiguous string.) 83 // 84 // During the build, when it's time to build main.a, the gofmt binary has the 85 // information needed to decide whether the eventual link would produce 86 // the same binary: if the action ID for main.a's inputs matches and then 87 // the action ID for the link step matches when assuming the given main.a 88 // content ID, then the binary as a whole is up-to-date and need not be rebuilt. 89 // 90 // This is all a bit complex and may be simplified once we can rely on the 91 // main cache, but at least at the start we will be using the content-based 92 // staleness determination without a cache beyond the usual installed 93 // package and binary locations. 94 95 const buildIDSeparator = "/" 96 97 // actionID returns the action ID half of a build ID. 98 func actionID(buildID string) string { 99 i := strings.Index(buildID, buildIDSeparator) 100 if i < 0 { 101 return buildID 102 } 103 return buildID[:i] 104 } 105 106 // contentID returns the content ID half of a build ID. 107 func contentID(buildID string) string { 108 return buildID[strings.LastIndex(buildID, buildIDSeparator)+1:] 109 } 110 111 // hashToString converts the hash h to a string to be recorded 112 // in package archives and binaries as part of the build ID. 113 // We use the first 96 bits of the hash and encode it in base64, 114 // resulting in a 16-byte string. Because this is only used for 115 // detecting the need to rebuild installed files (not for lookups 116 // in the object file cache), 96 bits are sufficient to drive the 117 // probability of a false "do not need to rebuild" decision to effectively zero. 118 // We embed two different hashes in archives and four in binaries, 119 // so cutting to 16 bytes is a significant savings when build IDs are displayed. 120 // (16*4+3 = 67 bytes compared to 64*4+3 = 259 bytes for the 121 // more straightforward option of printing the entire h in hex). 122 func hashToString(h [cache.HashSize]byte) string { 123 const b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" 124 const chunks = 5 125 var dst [chunks * 4]byte 126 for i := 0; i < chunks; i++ { 127 v := uint32(h[3*i])<<16 | uint32(h[3*i+1])<<8 | uint32(h[3*i+2]) 128 dst[4*i+0] = b64[(v>>18)&0x3F] 129 dst[4*i+1] = b64[(v>>12)&0x3F] 130 dst[4*i+2] = b64[(v>>6)&0x3F] 131 dst[4*i+3] = b64[v&0x3F] 132 } 133 return string(dst[:]) 134 } 135 136 // toolID returns the unique ID to use for the current copy of the 137 // named tool (asm, compile, cover, link). 138 // 139 // It is important that if the tool changes (for example a compiler bug is fixed 140 // and the compiler reinstalled), toolID returns a different string, so that old 141 // package archives look stale and are rebuilt (with the fixed compiler). 142 // This suggests using a content hash of the tool binary, as stored in the build ID. 143 // 144 // Unfortunately, we can't just open the tool binary, because the tool might be 145 // invoked via a wrapper program specified by -toolexec and we don't know 146 // what the wrapper program does. In particular, we want "-toolexec toolstash" 147 // to continue working: it does no good if "-toolexec toolstash" is executing a 148 // stashed copy of the compiler but the go command is acting as if it will run 149 // the standard copy of the compiler. The solution is to ask the tool binary to tell 150 // us its own build ID using the "-V=full" flag now supported by all tools. 151 // Then we know we're getting the build ID of the compiler that will actually run 152 // during the build. (How does the compiler binary know its own content hash? 153 // We store it there using updateBuildID after the standard link step.) 154 // 155 // A final twist is that we'd prefer to have reproducible builds for release toolchains. 156 // It should be possible to cross-compile for Windows from either Linux or Mac 157 // or Windows itself and produce the same binaries, bit for bit. If the tool ID, 158 // which influences the action ID half of the build ID, is based on the content ID, 159 // then the Linux compiler binary and Mac compiler binary will have different tool IDs 160 // and therefore produce executables with different action IDs. 161 // To avoids this problem, for releases we use the release version string instead 162 // of the compiler binary's content hash. This assumes that all compilers built 163 // on all different systems are semantically equivalent, which is of course only true 164 // modulo bugs. (Producing the exact same executables also requires that the different 165 // build setups agree on details like $GOROOT and file name paths, but at least the 166 // tool IDs do not make it impossible.) 167 func (b *Builder) toolID(name string) string { 168 b.id.Lock() 169 id := b.toolIDCache[name] 170 b.id.Unlock() 171 172 if id != "" { 173 return id 174 } 175 176 cmdline := str.StringList(cfg.BuildToolexec, base.Tool(name), "-V=full") 177 cmd := exec.Command(cmdline[0], cmdline[1:]...) 178 cmd.Env = base.EnvForDir(cmd.Dir, os.Environ()) 179 var stdout, stderr bytes.Buffer 180 cmd.Stdout = &stdout 181 cmd.Stderr = &stderr 182 if err := cmd.Run(); err != nil { 183 base.Fatalf("go tool %s: %v\n%s%s", name, err, stdout.Bytes(), stderr.Bytes()) 184 } 185 186 line := stdout.String() 187 f := strings.Fields(line) 188 if len(f) < 3 || f[0] != name || f[1] != "version" || f[2] == "devel" && !strings.HasPrefix(f[len(f)-1], "buildID=") { 189 base.Fatalf("go tool %s -V=full: unexpected output:\n\t%s", name, line) 190 } 191 if f[2] == "devel" { 192 // On the development branch, use the content ID part of the build ID. 193 id = contentID(f[len(f)-1]) 194 } else { 195 // For a release, the output is like: "compile version go1.9.1". Use the whole line. 196 id = f[2] 197 } 198 199 b.id.Lock() 200 b.toolIDCache[name] = id 201 b.id.Unlock() 202 203 return id 204 } 205 206 // buildID returns the build ID found in the given file. 207 // If no build ID is found, buildID returns the content hash of the file. 208 func (b *Builder) buildID(file string) string { 209 b.id.Lock() 210 id := b.buildIDCache[file] 211 b.id.Unlock() 212 213 if id != "" { 214 return id 215 } 216 217 id, err := buildid.ReadFile(file) 218 if err != nil { 219 id = b.fileHash(file) 220 } 221 222 b.id.Lock() 223 b.buildIDCache[file] = id 224 b.id.Unlock() 225 226 return id 227 } 228 229 // fileHash returns the content hash of the named file. 230 func (b *Builder) fileHash(file string) string { 231 sum, err := cache.FileHash(file) 232 if err != nil { 233 return "" 234 } 235 return hashToString(sum) 236 } 237 238 // useCache tries to satisfy the action a, which has action ID actionHash, 239 // by using a cached result from an earlier build. At the moment, the only 240 // cached result is the installed package or binary at target. 241 // If useCache decides that the cache can be used, it sets a.buildID 242 // and a.built for use by parent actions and then returns true. 243 // Otherwise it sets a.buildID to a temporary build ID for use in the build 244 // and returns false. When useCache returns false the expectation is that 245 // the caller will build the target and then call updateBuildID to finish the 246 // build ID computation. 247 func (b *Builder) useCache(a *Action, p *load.Package, actionHash cache.ActionID, target string) bool { 248 // The second half of the build ID here is a placeholder for the content hash. 249 // It's important that the overall buildID be unlikely verging on impossible 250 // to appear in the output by chance, but that should be taken care of by 251 // the actionID half; if it also appeared in the input that would be like an 252 // engineered 96-bit partial SHA256 collision. 253 a.actionID = actionHash 254 actionID := hashToString(actionHash) 255 contentID := actionID // temporary placeholder, likely unique 256 a.buildID = actionID + buildIDSeparator + contentID 257 258 // Executable binaries also record the main build ID in the middle. 259 // See "Build IDs" comment above. 260 if a.Mode == "link" { 261 mainpkg := a.Deps[0] 262 a.buildID = actionID + buildIDSeparator + mainpkg.buildID + buildIDSeparator + contentID 263 } 264 265 // Check to see if target exists and matches the expected action ID. 266 // If so, it's up to date and we can reuse it instead of rebuilding it. 267 var buildID string 268 if target != "" && !cfg.BuildA { 269 var err error 270 buildID, err = buildid.ReadFile(target) 271 if err != nil && b.ComputeStaleOnly { 272 if p != nil && !p.Stale { 273 p.Stale = true 274 p.StaleReason = "target missing" 275 } 276 return true 277 } 278 if strings.HasPrefix(buildID, actionID+buildIDSeparator) { 279 a.buildID = buildID 280 a.built = target 281 // Poison a.Target to catch uses later in the build. 282 a.Target = "DO NOT USE - " + a.Mode 283 return true 284 } 285 } 286 287 // Special case for building a main package: if the only thing we 288 // want the package for is to link a binary, and the binary is 289 // already up-to-date, then to avoid a rebuild, report the package 290 // as up-to-date as well. See "Build IDs" comment above. 291 // TODO(rsc): Rewrite this code to use a TryCache func on the link action. 292 if target != "" && !cfg.BuildA && a.Mode == "build" && len(a.triggers) == 1 && a.triggers[0].Mode == "link" { 293 buildID, err := buildid.ReadFile(target) 294 if err == nil { 295 id := strings.Split(buildID, buildIDSeparator) 296 if len(id) == 4 && id[1] == actionID { 297 // Temporarily assume a.buildID is the package build ID 298 // stored in the installed binary, and see if that makes 299 // the upcoming link action ID a match. If so, report that 300 // we built the package, safe in the knowledge that the 301 // link step will not ask us for the actual package file. 302 // Note that (*Builder).LinkAction arranged that all of 303 // a.triggers[0]'s dependencies other than a are also 304 // dependencies of a, so that we can be sure that, 305 // other than a.buildID, b.linkActionID is only accessing 306 // build IDs of completed actions. 307 oldBuildID := a.buildID 308 a.buildID = id[1] + buildIDSeparator + id[2] 309 linkID := hashToString(b.linkActionID(a.triggers[0])) 310 if id[0] == linkID { 311 // Poison a.Target to catch uses later in the build. 312 a.Target = "DO NOT USE - main build pseudo-cache Target" 313 a.built = "DO NOT USE - main build pseudo-cache built" 314 return true 315 } 316 // Otherwise restore old build ID for main build. 317 a.buildID = oldBuildID 318 } 319 } 320 } 321 322 // Special case for linking a test binary: if the only thing we 323 // want the binary for is to run the test, and the test result is cached, 324 // then to avoid the link step, report the link as up-to-date. 325 // We avoid the nested build ID problem in the previous special case 326 // by recording the test results in the cache under the action ID half. 327 if !cfg.BuildA && len(a.triggers) == 1 && a.triggers[0].TryCache != nil && a.triggers[0].TryCache(b, a.triggers[0]) { 328 a.Target = "DO NOT USE - pseudo-cache Target" 329 a.built = "DO NOT USE - pseudo-cache built" 330 return true 331 } 332 333 if b.ComputeStaleOnly { 334 // Invoked during go list only to compute and record staleness. 335 if p := a.Package; p != nil && !p.Stale { 336 p.Stale = true 337 if cfg.BuildA { 338 p.StaleReason = "build -a flag in use" 339 } else { 340 p.StaleReason = "build ID mismatch" 341 for _, p1 := range p.Internal.Imports { 342 if p1.Stale && p1.StaleReason != "" { 343 if strings.HasPrefix(p1.StaleReason, "stale dependency: ") { 344 p.StaleReason = p1.StaleReason 345 break 346 } 347 if strings.HasPrefix(p.StaleReason, "build ID mismatch") { 348 p.StaleReason = "stale dependency: " + p1.ImportPath 349 } 350 } 351 } 352 } 353 } 354 return true 355 } 356 357 // Check the build artifact cache. 358 // We treat hits in this cache as being "stale" for the purposes of go list 359 // (in effect, "stale" means whether p.Target is up-to-date), 360 // but we're still happy to use results from the build artifact cache. 361 if !cfg.BuildA { 362 if c := cache.Default(); c != nil { 363 outputID, size, err := c.Get(actionHash) 364 if err == nil { 365 file := c.OutputFile(outputID) 366 info, err1 := os.Stat(file) 367 buildID, err2 := buildid.ReadFile(file) 368 if err1 == nil && err2 == nil && info.Size() == size { 369 a.built = file 370 a.Target = "DO NOT USE - using cache" 371 a.buildID = buildID 372 return true 373 } 374 } 375 } 376 } 377 378 return false 379 } 380 381 // updateBuildID updates the build ID in the target written by action a. 382 // It requires that useCache was called for action a and returned false, 383 // and that the build was then carried out and given the temporary 384 // a.buildID to record as the build ID in the resulting package or binary. 385 // updateBuildID computes the final content ID and updates the build IDs 386 // in the binary. 387 func (b *Builder) updateBuildID(a *Action, target string, rewrite bool) error { 388 if cfg.BuildX || cfg.BuildN { 389 if rewrite { 390 b.Showcmd("", "%s # internal", joinUnambiguously(str.StringList(base.Tool("buildid"), "-w", target))) 391 } 392 if cfg.BuildN { 393 return nil 394 } 395 } 396 397 // Find occurrences of old ID and compute new content-based ID. 398 r, err := os.Open(target) 399 if err != nil { 400 return err 401 } 402 matches, hash, err := buildid.FindAndHash(r, a.buildID, 0) 403 r.Close() 404 if err != nil { 405 return err 406 } 407 newID := a.buildID[:strings.LastIndex(a.buildID, buildIDSeparator)] + buildIDSeparator + hashToString(hash) 408 if len(newID) != len(a.buildID) { 409 return fmt.Errorf("internal error: build ID length mismatch %q vs %q", a.buildID, newID) 410 } 411 412 // Replace with new content-based ID. 413 a.buildID = newID 414 if len(matches) == 0 { 415 // Assume the user specified -buildid= to override what we were going to choose. 416 return nil 417 } 418 419 if rewrite { 420 w, err := os.OpenFile(target, os.O_WRONLY, 0) 421 if err != nil { 422 return err 423 } 424 err = buildid.Rewrite(w, matches, newID) 425 if err != nil { 426 w.Close() 427 return err 428 } 429 if err := w.Close(); err != nil { 430 return err 431 } 432 } 433 434 // Cache package builds, but not binaries (link steps). 435 // The expectation is that binaries are not reused 436 // nearly as often as individual packages, and they're 437 // much larger, so the cache-footprint-to-utility ratio 438 // of binaries is much lower for binaries. 439 // Not caching the link step also makes sure that repeated "go run" at least 440 // always rerun the linker, so that they don't get too fast. 441 // (We don't want people thinking go is a scripting language.) 442 // Note also that if we start caching binaries, then we will 443 // copy the binaries out of the cache to run them, and then 444 // that will mean the go process is itself writing a binary 445 // and then executing it, so we will need to defend against 446 // ETXTBSY problems as discussed in exec.go and golang.org/issue/22220. 447 if c := cache.Default(); c != nil && a.Mode == "build" { 448 r, err := os.Open(target) 449 if err == nil { 450 c.Put(a.actionID, r) 451 r.Close() 452 } 453 } 454 455 return nil 456 }