github.phpd.cn/thought-machine/please@v12.2.0+incompatible/src/build/incrementality.go (about) 1 // Utilities to help with incremental builds. 2 // 3 // There are four things we consider for each rule: 4 // - the global config, some parts of which affect all rules 5 // - the rule definition itself (the command to run, etc) 6 // - any input files it might have 7 // - any dependencies. 8 // 9 // If all of those are the same as the last time the rule was run, 10 // we can safely assume that the output will be the same this time 11 // and so we don't have to re-run it again. 12 13 package build 14 15 import ( 16 "bytes" 17 "crypto/sha1" 18 "encoding/base64" 19 "encoding/gob" 20 "fmt" 21 "hash" 22 "io" 23 "io/ioutil" 24 "os" 25 "path" 26 "path/filepath" 27 "sort" 28 "strings" 29 "sync" 30 31 "core" 32 "fs" 33 ) 34 35 const hashLength = sha1.Size 36 37 // Length of the hash file we write 38 const hashFileLength = 5 * hashLength 39 40 // Length of old hash files that don't include secrets. 41 // Because that's basically everything we're going to keep compatibility for a while. 42 const oldHashFileLength = 4 * hashLength 43 44 // noSecrets is the thing we write when a rule doesn't have any secrets defined. 45 var noSecrets = []byte{45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45} 46 47 // Used to write something when we need to indicate a boolean in a hash. Can be essentially 48 // any value as long as they're different from one another. 49 var boolTrueHashValue = []byte{2} 50 var boolFalseHashValue = []byte{1} 51 52 // Return true if the rule needs building, false if the existing outputs are OK. 53 func needsBuilding(state *core.BuildState, target *core.BuildTarget, postBuild bool) bool { 54 // Check the dependencies first, because they don't need any disk I/O. 55 if target.NeedsTransitiveDependencies { 56 if anyDependencyHasChanged(target) { 57 return true // one of the transitive deps has changed, need to rebuild 58 } 59 } else { 60 for _, dep := range target.Dependencies() { 61 if dep.State() < core.Unchanged { 62 log.Debug("Need to rebuild %s, %s has changed", target.Label, dep.Label) 63 return true // dependency has just been rebuilt, do this too. 64 } 65 } 66 } 67 oldRuleHash, oldConfigHash, oldSourceHash, oldSecretHash := readRuleHashFile(ruleHashFileName(target), postBuild) 68 if !bytes.Equal(oldConfigHash, state.Hashes.Config) { 69 if len(oldConfigHash) == 0 { 70 // Small nicety to make it a bit clearer what's going on. 71 log.Debug("Need to build %s, outputs aren't there", target.Label) 72 } else { 73 log.Debug("Need to rebuild %s, config has changed (was %s, need %s)", target.Label, b64(oldConfigHash), b64(state.Hashes.Config)) 74 } 75 return true 76 } 77 newRuleHash := RuleHash(state, target, false, postBuild) 78 if !bytes.Equal(oldRuleHash, newRuleHash) { 79 log.Debug("Need to rebuild %s, rule has changed (was %s, need %s)", target.Label, b64(oldRuleHash), b64(newRuleHash)) 80 return true 81 } 82 newSourceHash, err := sourceHash(state, target) 83 if err != nil || !bytes.Equal(oldSourceHash, newSourceHash) { 84 log.Debug("Need to rebuild %s, sources have changed (was %s, need %s)", target.Label, b64(oldSourceHash), b64(newSourceHash)) 85 return true 86 } 87 newSecretHash, err := secretHash(target) 88 if err != nil || !bytes.Equal(oldSecretHash, newSecretHash) { 89 log.Debug("Need to rebuild %s, secrets have changed (was %s, need %s)", target.Label, b64(oldSecretHash), b64(newSecretHash)) 90 return true 91 } 92 93 // Check the outputs of this rule exist. This would only happen if the user had 94 // removed them but it's incredibly aggravating if you remove an output and the 95 // rule won't rebuild itself. 96 for _, output := range target.Outputs() { 97 realOutput := path.Join(target.OutDir(), output) 98 if !core.PathExists(realOutput) { 99 log.Debug("Output %s doesn't exist for rule %s; will rebuild.", realOutput, target.Label) 100 return true 101 } 102 } 103 // Maybe we've forced a rebuild. Do this last; might be interesting to see if it needed building anyway. 104 return state.ForceRebuild && (state.IsOriginalTarget(target.Label) || state.IsOriginalTarget(target.Label.Parent())) 105 } 106 107 // b64 base64 encodes a string of bytes for printing. 108 func b64(b []byte) string { 109 if len(b) == 0 { 110 return "<not found>" 111 } 112 return base64.RawStdEncoding.EncodeToString(b) 113 } 114 115 // Returns true if any transitive dependency of this target has changed. 116 func anyDependencyHasChanged(target *core.BuildTarget) bool { 117 done := map[core.BuildLabel]bool{} 118 var inner func(*core.BuildTarget) bool 119 inner = func(dependency *core.BuildTarget) bool { 120 done[dependency.Label] = true 121 if dependency != target && dependency.State() < core.Unchanged { 122 return true 123 } else if !dependency.OutputIsComplete || dependency == target { 124 for _, dep := range dependency.Dependencies() { 125 if !done[dep.Label] { 126 if inner(dep) { 127 log.Debug("Need to rebuild %s, %s has changed", target.Label, dep.Label) 128 return true 129 } 130 } 131 } 132 } 133 return false 134 } 135 return inner(target) 136 } 137 138 func mustSourceHash(state *core.BuildState, target *core.BuildTarget) []byte { 139 b, err := sourceHash(state, target) 140 if err != nil { 141 log.Fatalf("%s", err) 142 } 143 return b 144 } 145 146 // Calculate the hash of all sources of this rule 147 func sourceHash(state *core.BuildState, target *core.BuildTarget) ([]byte, error) { 148 h := sha1.New() 149 for source := range core.IterSources(state.Graph, target) { 150 result, err := pathHash(source.Src, false) 151 if err != nil { 152 return nil, err 153 } 154 h.Write(result) 155 h.Write([]byte(source.Src)) 156 } 157 for _, tool := range target.AllTools() { 158 if label := tool.Label(); label != nil { 159 // Note that really it would be more correct to hash the outputs of these rules 160 // in the same way we calculate a hash of sources for the rule, but that is 161 // impractical for some cases (notably npm) where tools can be very large. 162 // Instead we assume calculating the target hash is sufficient. 163 h.Write(mustTargetHash(state, state.Graph.TargetOrDie(*label))) 164 } else { 165 result, err := pathHash(tool.FullPaths(state.Graph)[0], false) 166 if err != nil { 167 return nil, err 168 } 169 h.Write(result) 170 } 171 } 172 return h.Sum(nil), nil 173 } 174 175 // Used to memoize the results of pathHash so we don't hash the same files multiple times. 176 var pathHashMemoizer = map[string][]byte{} 177 var pathHashMutex sync.RWMutex // Of course it will be accessed concurrently. 178 179 // Calculate the hash of a single path which might be a file or a directory 180 // This is the memoized form that only hashes each path once, unless recalc is true in which 181 // case it will force a recalculation of the hash. 182 func pathHash(path string, recalc bool) ([]byte, error) { 183 path = ensureRelative(path) 184 if !recalc { 185 pathHashMutex.RLock() 186 cached, present := pathHashMemoizer[path] 187 pathHashMutex.RUnlock() 188 if present { 189 return cached, nil 190 } 191 } 192 result, err := pathHashImpl(path) 193 if err == nil { 194 pathHashMutex.Lock() 195 pathHashMemoizer[path] = result 196 pathHashMutex.Unlock() 197 } 198 return result, err 199 } 200 201 func mustPathHash(path string) []byte { 202 hash, err := pathHash(path, false) 203 if err != nil { 204 panic(err) 205 } 206 return hash 207 } 208 209 func pathHashImpl(path string) ([]byte, error) { 210 h := sha1.New() 211 info, err := os.Lstat(path) 212 if err == nil && info.Mode()&os.ModeSymlink != 0 { 213 // Handle symlinks specially (don't attempt to read their contents). 214 dest, err := os.Readlink(path) 215 if err != nil { 216 return nil, err 217 } 218 // Write something arbitrary indicating this is a symlink. 219 // This isn't quite perfect - it could potentially get mixed up with a file with the 220 // appropriate contents, but that is not really likely. 221 h.Write(boolTrueHashValue) 222 h.Write([]byte(dest)) 223 return h.Sum(nil), nil 224 } else if err == nil && info.IsDir() { 225 err = fs.WalkMode(path, func(p string, isDir bool, mode os.FileMode) error { 226 if mode&os.ModeSymlink != 0 { 227 // Is a symlink, must verify that it's not a link outside the tmp dir. 228 deref, err := filepath.EvalSymlinks(p) 229 if err != nil { 230 return err 231 } 232 if !strings.HasPrefix(deref, path) { 233 return fmt.Errorf("Output %s links outside the build dir (to %s)", p, deref) 234 } 235 // Deliberately do not attempt to read it. We will read the contents later since 236 // it is a link within the temp dir anyway, and if it's a link to a directory 237 // it can introduce a cycle. 238 // Just write something to the hash indicating that we found something here, 239 // otherwise rules might be marked as unchanged if they added additional symlinks. 240 h.Write(boolTrueHashValue) 241 } else if !isDir { 242 return fileHash(&h, p) 243 } 244 return nil 245 }) 246 } else { 247 err = fileHash(&h, path) // let this handle any other errors 248 } 249 return h.Sum(nil), err 250 } 251 252 // movePathHash is used when we move files from tmp to out and there was one there before; that's 253 // the only case in which the hash of a filepath could change. 254 func movePathHash(oldPath, newPath string, copy bool) { 255 oldPath = ensureRelative(oldPath) 256 newPath = ensureRelative(newPath) 257 pathHashMutex.Lock() 258 defer pathHashMutex.Unlock() 259 if oldHash, present := pathHashMemoizer[oldPath]; present { 260 pathHashMemoizer[newPath] = oldHash 261 // If the path is in plz-out/tmp we aren't ever going to use it again, so free some space. 262 if !copy && strings.HasPrefix(oldPath, core.TmpDir) { 263 delete(pathHashMemoizer, oldPath) 264 } 265 } 266 } 267 268 // setPathHash is used to directly set a hash for a path. 269 // This is used for remote files where we download them & therefore know the hash as they come in. 270 // TODO(peterebden): We should probably use this more for things like caches and so forth... 271 func setPathHash(path string, hash []byte) { 272 pathHashMutex.Lock() 273 pathHashMemoizer[path] = hash 274 pathHashMutex.Unlock() 275 } 276 277 // ensureRelative ensures a path is relative to the repo root. 278 // This is important for getting best performance from memoizing the path hashes. 279 func ensureRelative(path string) string { 280 if strings.HasPrefix(path, core.RepoRoot) { 281 return strings.TrimLeft(strings.TrimPrefix(path, core.RepoRoot), "/") 282 } 283 return path 284 } 285 286 // Calculate the hash of a single file 287 func fileHash(h *hash.Hash, filename string) error { 288 file, err := os.Open(filename) 289 if err != nil { 290 return err 291 } 292 _, err = io.Copy(*h, file) 293 file.Close() 294 return err 295 } 296 297 // RuleHash calculates a hash for the relevant bits of this rule that affect its output. 298 // Optionally it can include parts of the rule that affect runtime (most obviously test-time). 299 // Note that we have to hash on the declared fields, we obviously can't hash pointers etc. 300 // incrementality_test will warn if new fields are added to the struct but not here. 301 func RuleHash(state *core.BuildState, target *core.BuildTarget, runtime, postBuild bool) []byte { 302 if runtime || (postBuild && target.PostBuildFunction != nil) { 303 return ruleHash(state, target, runtime) 304 } 305 // Non-post-build hashes get stored on the target itself. 306 if len(target.RuleHash) != 0 { 307 return target.RuleHash 308 } 309 target.RuleHash = ruleHash(state, target, false) // This is never a runtime hash. 310 return target.RuleHash 311 } 312 313 func ruleHash(state *core.BuildState, target *core.BuildTarget, runtime bool) []byte { 314 h := sha1.New() 315 h.Write([]byte(target.Label.String())) 316 for _, dep := range target.DeclaredDependencies() { 317 h.Write([]byte(dep.String())) 318 } 319 for _, vis := range target.Visibility { 320 h.Write([]byte(vis.String())) // Doesn't strictly affect the output, but best to be safe. 321 } 322 for _, hsh := range target.Hashes { 323 h.Write([]byte(hsh)) 324 } 325 for _, source := range target.AllSources() { 326 h.Write([]byte(source.String())) 327 } 328 for _, out := range target.DeclaredOutputs() { 329 h.Write([]byte(out)) 330 } 331 outs := target.DeclaredNamedOutputs() 332 for _, name := range target.DeclaredOutputNames() { 333 h.Write([]byte(name)) 334 for _, out := range outs[name] { 335 h.Write([]byte(out)) 336 } 337 } 338 for _, licence := range target.Licences { 339 h.Write([]byte(licence)) 340 } 341 for _, output := range target.TestOutputs { 342 h.Write([]byte(output)) 343 } 344 for _, output := range target.OptionalOutputs { 345 h.Write([]byte(output)) 346 } 347 for _, label := range target.Labels { 348 h.Write([]byte(label)) 349 } 350 for _, secret := range target.Secrets { 351 h.Write([]byte(secret)) 352 } 353 hashBool(h, target.IsBinary) 354 hashBool(h, target.IsTest) 355 hashOptionalBool(h, target.Sandbox) 356 357 // Note that we only hash the current command here; whatever's set in commands that we're not going 358 // to run is uninteresting to us. 359 h.Write([]byte(target.GetCommand(state))) 360 361 if runtime { 362 // Similarly, we only hash the current command here again. 363 h.Write([]byte(target.GetTestCommand(state))) 364 for _, datum := range target.Data { 365 h.Write([]byte(datum.String())) 366 } 367 hashBool(h, target.Containerise) 368 hashOptionalBool(h, target.TestSandbox) 369 if target.ContainerSettings != nil { 370 e := gob.NewEncoder(h) 371 if err := e.Encode(target.ContainerSettings); err != nil { 372 panic(err) 373 } 374 } 375 if target.Containerise { 376 h.Write(state.Hashes.Containerisation) 377 } 378 } 379 380 hashBool(h, target.NeedsTransitiveDependencies) 381 hashBool(h, target.OutputIsComplete) 382 // Should really not be conditional here, but we don't want adding the new flag to 383 // change the hash of every single other target everywhere. 384 // Might consider removing this the next time we peturb the hashing strategy. 385 hashOptionalBool(h, target.Stamp) 386 hashOptionalBool(h, target.IsFilegroup) 387 hashOptionalBool(h, target.IsHashFilegroup) 388 hashOptionalBool(h, target.IsRemoteFile) 389 for _, require := range target.Requires { 390 h.Write([]byte(require)) 391 } 392 // Indeterminate iteration order, yay... 393 languages := []string{} 394 for k := range target.Provides { 395 languages = append(languages, k) 396 } 397 sort.Strings(languages) 398 for _, lang := range languages { 399 h.Write([]byte(lang)) 400 h.Write([]byte(target.Provides[lang].String())) 401 } 402 // We don't need to hash the functions themselves because they get rerun every time - 403 // we just need to check whether one is added or removed, which is good since it's 404 // nigh impossible to really verify whether it's changed or not (since it may call 405 // any amount of other stuff). 406 hashBool(h, target.PreBuildFunction != nil) 407 hashBool(h, target.PostBuildFunction != nil) 408 return h.Sum(nil) 409 } 410 411 func hashBool(writer hash.Hash, b bool) { 412 if b { 413 writer.Write(boolTrueHashValue) 414 } else { 415 writer.Write(boolFalseHashValue) 416 } 417 } 418 419 func hashOptionalBool(writer hash.Hash, b bool) { 420 if b { 421 hashBool(writer, b) 422 } 423 } 424 425 // readRuleHashFile reads the contents of a rule hash file into separate byte arrays 426 // Arrays will be empty if there's an error reading the file. 427 // If postBuild is true then the rule hash will be the post-build one if present. 428 func readRuleHashFile(filename string, postBuild bool) ([]byte, []byte, []byte, []byte) { 429 contents := make([]byte, hashFileLength) 430 file, err := os.Open(filename) 431 if err != nil { 432 if !os.IsNotExist(err) { 433 log.Warning("Failed to read rule hash file %s: %s", filename, err) 434 } 435 return nil, nil, nil, nil 436 } 437 defer file.Close() 438 if n, err := file.Read(contents); err != nil { 439 log.Warning("Error reading rule hash file %s: %s", filename, err) 440 return nil, nil, nil, nil 441 } else if n == oldHashFileLength { 442 // Handle older hash files that don't have secrets in them. 443 copy(contents[4*hashLength:hashFileLength], noSecrets) 444 } else if n != hashFileLength { 445 log.Warning("Unexpected rule hash file length: expected %d bytes, was %d", hashFileLength, n) 446 return nil, nil, nil, nil 447 } 448 if postBuild { 449 return contents[hashLength : 2*hashLength], contents[2*hashLength : 3*hashLength], contents[3*hashLength : 4*hashLength], contents[4*hashLength : hashFileLength] 450 } 451 return contents[0:hashLength], contents[2*hashLength : 3*hashLength], contents[3*hashLength : 4*hashLength], contents[4*hashLength : hashFileLength] 452 } 453 454 // Writes the contents of the rule hash file 455 func writeRuleHashFile(state *core.BuildState, target *core.BuildTarget) error { 456 hash, err := targetHash(state, target) 457 if err != nil { 458 return err 459 } 460 secretHash, err := secretHash(target) 461 if err != nil { 462 return err 463 } 464 file, err := os.Create(ruleHashFileName(target)) 465 if err != nil { 466 return err 467 } 468 defer file.Close() 469 n, err := file.Write(append(hash, secretHash...)) 470 if err != nil { 471 return err 472 } else if n != hashFileLength { 473 return fmt.Errorf("Wrote %d bytes to rule hash file; should be %d", n, hashFileLength) 474 } 475 return nil 476 } 477 478 // Returns the filename we'll store the hashes for this file in. 479 func ruleHashFileName(target *core.BuildTarget) string { 480 return path.Join(target.OutDir(), ".rule_hash_"+target.Label.Name) 481 } 482 483 func postBuildOutputFileName(target *core.BuildTarget) string { 484 return path.Join(target.OutDir(), target.PostBuildOutputFileName()) 485 } 486 487 // For targets that have post-build functions, we have to store and retrieve the target's 488 // output to feed to it 489 func loadPostBuildOutput(state *core.BuildState, target *core.BuildTarget) (string, error) { 490 // Normally filegroups don't have post-build functions, but we use this sometimes for testing. 491 if target.IsFilegroup { 492 return "", nil 493 } 494 out, err := ioutil.ReadFile(postBuildOutputFileName(target)) 495 if err != nil { 496 return "", err 497 } 498 return string(out), nil 499 } 500 501 func storePostBuildOutput(state *core.BuildState, target *core.BuildTarget, out []byte) { 502 filename := postBuildOutputFileName(target) 503 if err := os.RemoveAll(filename); err != nil { 504 panic(err) 505 } 506 if err := ioutil.WriteFile(filename, out, 0644); err != nil { 507 panic(err) 508 } 509 } 510 511 // targetHash returns the hash for a target and any error encountered while calculating it. 512 func targetHash(state *core.BuildState, target *core.BuildTarget) ([]byte, error) { 513 hash := append(RuleHash(state, target, false, false), RuleHash(state, target, false, true)...) 514 hash = append(hash, state.Hashes.Config...) 515 hash2, err := sourceHash(state, target) 516 if err != nil { 517 return nil, err 518 } 519 return append(hash, hash2...), nil 520 } 521 522 // mustTargetHash returns the hash for a target and panics if it can't be calculated. 523 func mustTargetHash(state *core.BuildState, target *core.BuildTarget) []byte { 524 hash, err := targetHash(state, target) 525 if err != nil { 526 panic(err) 527 } 528 return hash 529 } 530 531 // mustShortTargetHash returns the hash for a target, shortened to 1/4 length. 532 func mustShortTargetHash(state *core.BuildState, target *core.BuildTarget) []byte { 533 return core.CollapseHash(mustTargetHash(state, target)) 534 } 535 536 // RuntimeHash returns the target hash, source hash, config hash & runtime file hash, 537 // all rolled into one. Essentially this is one hash needed to determine if the runtime 538 // state is consistent. 539 func RuntimeHash(state *core.BuildState, target *core.BuildTarget) ([]byte, error) { 540 hash := append(RuleHash(state, target, true, false), RuleHash(state, target, true, true)...) 541 hash = append(hash, state.Hashes.Config...) 542 sh, err := sourceHash(state, target) 543 if err != nil { 544 return nil, err 545 } 546 h := sha1.New() 547 h.Write(sh) 548 for source := range core.IterRuntimeFiles(state.Graph, target, true) { 549 result, err := pathHash(source.Src, false) 550 if err != nil { 551 return result, err 552 } 553 h.Write(result) 554 } 555 return append(hash, h.Sum(nil)...), nil 556 } 557 558 // PrintHashes prints the various hashes for a target to stdout. 559 // It's used by plz hash --detailed to show a breakdown of the input hashes of a target. 560 func PrintHashes(state *core.BuildState, target *core.BuildTarget) { 561 fmt.Printf("%s:\n", target.Label) 562 fmt.Printf(" Config: %s\n", b64(state.Hashes.Config)) 563 fmt.Printf(" Rule: %s (pre-build)\n", b64(RuleHash(state, target, false, false))) 564 fmt.Printf(" Rule: %s (post-build)\n", b64(RuleHash(state, target, false, true))) 565 fmt.Printf(" Source: %s\n", b64(mustSourceHash(state, target))) 566 // Note that the logic here mimics sourceHash, but I don't want to pollute that with 567 // optional printing nonsense since it's on our hot path. 568 for source := range core.IterSources(state.Graph, target) { 569 fmt.Printf(" Source: %s: %s\n", source.Src, b64(mustPathHash(source.Src))) 570 } 571 for _, tool := range target.AllTools() { 572 if label := tool.Label(); label != nil { 573 fmt.Printf(" Tool: %s: %s\n", *label, b64(mustShortTargetHash(state, state.Graph.TargetOrDie(*label)))) 574 } else { 575 fmt.Printf(" Tool: %s: %s\n", tool, b64(mustPathHash(tool.FullPaths(state.Graph)[0]))) 576 } 577 } 578 } 579 580 // secretHash calculates a hash for any secrets of a target. 581 func secretHash(target *core.BuildTarget) ([]byte, error) { 582 if len(target.Secrets) == 0 { 583 return noSecrets, nil 584 } 585 h := sha1.New() 586 for _, secret := range target.Secrets { 587 ph, err := pathHash(secret, false) 588 if err != nil && os.IsNotExist(err) { 589 return noSecrets, nil // Not having the secrets is not an error yet. 590 } else if err != nil { 591 return nil, err 592 } 593 h.Write(ph) 594 } 595 return h.Sum(nil), nil 596 }