github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/cmd/go/internal/work/buildid.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package work
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"os"
    11  	"os/exec"
    12  	"strings"
    13  
    14  	"cmd/go/internal/base"
    15  	"cmd/go/internal/cache"
    16  	"cmd/go/internal/cfg"
    17  	"cmd/go/internal/load"
    18  	"cmd/go/internal/str"
    19  	"cmd/internal/buildid"
    20  )
    21  
    22  // Build IDs
    23  //
    24  // Go packages and binaries are stamped with build IDs that record both
    25  // the action ID, which is a hash of the inputs to the action that produced
    26  // the packages or binary, and the content ID, which is a hash of the action
    27  // output, namely the archive or binary itself. The hash is the same one
    28  // used by the build artifact cache (see cmd/go/internal/cache), but
    29  // truncated when stored in packages and binaries, as the full length is not
    30  // needed and is a bit unwieldy. The precise form is
    31  //
    32  //	actionID/[.../]contentID
    33  //
    34  // where the actionID and contentID are prepared by hashToString below.
    35  // and are found by looking for the first or last slash.
    36  // Usually the buildID is simply actionID/contentID, but see below for an
    37  // exception.
    38  //
    39  // The build ID serves two primary purposes.
    40  //
    41  // 1. The action ID half allows installed packages and binaries to serve as
    42  // one-element cache entries. If we intend to build math.a with a given
    43  // set of inputs summarized in the action ID, and the installed math.a already
    44  // has that action ID, we can reuse the installed math.a instead of rebuilding it.
    45  //
    46  // 2. The content ID half allows the easy preparation of action IDs for steps
    47  // that consume a particular package or binary. The content hash of every
    48  // input file for a given action must be included in the action ID hash.
    49  // Storing the content ID in the build ID lets us read it from the file with
    50  // minimal I/O, instead of reading and hashing the entire file.
    51  // This is especially effective since packages and binaries are typically
    52  // the largest inputs to an action.
    53  //
    54  // Separating action ID from content ID is important for reproducible builds.
    55  // The compiler is compiled with itself. If an output were represented by its
    56  // own action ID (instead of content ID) when computing the action ID of
    57  // the next step in the build process, then the compiler could never have its
    58  // own input action ID as its output action ID (short of a miraculous hash collision).
    59  // Instead we use the content IDs to compute the next action ID, and because
    60  // the content IDs converge, so too do the action IDs and therefore the
    61  // build IDs and the overall compiler binary. See cmd/dist's cmdbootstrap
    62  // for the actual convergence sequence.
    63  //
    64  // The “one-element cache” purpose is a bit more complex for installed
    65  // binaries. For a binary, like cmd/gofmt, there are two steps: compile
    66  // cmd/gofmt/*.go into main.a, and then link main.a into the gofmt binary.
    67  // We do not install gofmt's main.a, only the gofmt binary. Being able to
    68  // decide that the gofmt binary is up-to-date means computing the action ID
    69  // for the final link of the gofmt binary and comparing it against the
    70  // already-installed gofmt binary. But computing the action ID for the link
    71  // means knowing the content ID of main.a, which we did not keep.
    72  // To sidestep this problem, each binary actually stores an expanded build ID:
    73  //
    74  //	actionID(binary)/actionID(main.a)/contentID(main.a)/contentID(binary)
    75  //
    76  // (Note that this can be viewed equivalently as:
    77  //
    78  //	actionID(binary)/buildID(main.a)/contentID(binary)
    79  //
    80  // Storing the buildID(main.a) in the middle lets the computations that care
    81  // about the prefix or suffix halves ignore the middle and preserves the
    82  // original build ID as a contiguous string.)
    83  //
    84  // During the build, when it's time to build main.a, the gofmt binary has the
    85  // information needed to decide whether the eventual link would produce
    86  // the same binary: if the action ID for main.a's inputs matches and then
    87  // the action ID for the link step matches when assuming the given main.a
    88  // content ID, then the binary as a whole is up-to-date and need not be rebuilt.
    89  //
    90  // This is all a bit complex and may be simplified once we can rely on the
    91  // main cache, but at least at the start we will be using the content-based
    92  // staleness determination without a cache beyond the usual installed
    93  // package and binary locations.
    94  
    95  const buildIDSeparator = "/"
    96  
    97  // actionID returns the action ID half of a build ID.
    98  func actionID(buildID string) string {
    99  	i := strings.Index(buildID, buildIDSeparator)
   100  	if i < 0 {
   101  		return buildID
   102  	}
   103  	return buildID[:i]
   104  }
   105  
   106  // contentID returns the content ID half of a build ID.
   107  func contentID(buildID string) string {
   108  	return buildID[strings.LastIndex(buildID, buildIDSeparator)+1:]
   109  }
   110  
   111  // hashToString converts the hash h to a string to be recorded
   112  // in package archives and binaries as part of the build ID.
   113  // We use the first 96 bits of the hash and encode it in base64,
   114  // resulting in a 16-byte string. Because this is only used for
   115  // detecting the need to rebuild installed files (not for lookups
   116  // in the object file cache), 96 bits are sufficient to drive the
   117  // probability of a false "do not need to rebuild" decision to effectively zero.
   118  // We embed two different hashes in archives and four in binaries,
   119  // so cutting to 16 bytes is a significant savings when build IDs are displayed.
   120  // (16*4+3 = 67 bytes compared to 64*4+3 = 259 bytes for the
   121  // more straightforward option of printing the entire h in hex).
   122  func hashToString(h [cache.HashSize]byte) string {
   123  	const b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
   124  	const chunks = 5
   125  	var dst [chunks * 4]byte
   126  	for i := 0; i < chunks; i++ {
   127  		v := uint32(h[3*i])<<16 | uint32(h[3*i+1])<<8 | uint32(h[3*i+2])
   128  		dst[4*i+0] = b64[(v>>18)&0x3F]
   129  		dst[4*i+1] = b64[(v>>12)&0x3F]
   130  		dst[4*i+2] = b64[(v>>6)&0x3F]
   131  		dst[4*i+3] = b64[v&0x3F]
   132  	}
   133  	return string(dst[:])
   134  }
   135  
   136  // toolID returns the unique ID to use for the current copy of the
   137  // named tool (asm, compile, cover, link).
   138  //
   139  // It is important that if the tool changes (for example a compiler bug is fixed
   140  // and the compiler reinstalled), toolID returns a different string, so that old
   141  // package archives look stale and are rebuilt (with the fixed compiler).
   142  // This suggests using a content hash of the tool binary, as stored in the build ID.
   143  //
   144  // Unfortunately, we can't just open the tool binary, because the tool might be
   145  // invoked via a wrapper program specified by -toolexec and we don't know
   146  // what the wrapper program does. In particular, we want "-toolexec toolstash"
   147  // to continue working: it does no good if "-toolexec toolstash" is executing a
   148  // stashed copy of the compiler but the go command is acting as if it will run
   149  // the standard copy of the compiler. The solution is to ask the tool binary to tell
   150  // us its own build ID using the "-V=full" flag now supported by all tools.
   151  // Then we know we're getting the build ID of the compiler that will actually run
   152  // during the build. (How does the compiler binary know its own content hash?
   153  // We store it there using updateBuildID after the standard link step.)
   154  //
   155  // A final twist is that we'd prefer to have reproducible builds for release toolchains.
   156  // It should be possible to cross-compile for Windows from either Linux or Mac
   157  // or Windows itself and produce the same binaries, bit for bit. If the tool ID,
   158  // which influences the action ID half of the build ID, is based on the content ID,
   159  // then the Linux compiler binary and Mac compiler binary will have different tool IDs
   160  // and therefore produce executables with different action IDs.
   161  // To avoids this problem, for releases we use the release version string instead
   162  // of the compiler binary's content hash. This assumes that all compilers built
   163  // on all different systems are semantically equivalent, which is of course only true
   164  // modulo bugs. (Producing the exact same executables also requires that the different
   165  // build setups agree on details like $GOROOT and file name paths, but at least the
   166  // tool IDs do not make it impossible.)
   167  func (b *Builder) toolID(name string) string {
   168  	b.id.Lock()
   169  	id := b.toolIDCache[name]
   170  	b.id.Unlock()
   171  
   172  	if id != "" {
   173  		return id
   174  	}
   175  
   176  	cmdline := str.StringList(cfg.BuildToolexec, base.Tool(name), "-V=full")
   177  	cmd := exec.Command(cmdline[0], cmdline[1:]...)
   178  	cmd.Env = base.EnvForDir(cmd.Dir, os.Environ())
   179  	var stdout, stderr bytes.Buffer
   180  	cmd.Stdout = &stdout
   181  	cmd.Stderr = &stderr
   182  	if err := cmd.Run(); err != nil {
   183  		base.Fatalf("go tool %s: %v\n%s%s", name, err, stdout.Bytes(), stderr.Bytes())
   184  	}
   185  
   186  	line := stdout.String()
   187  	f := strings.Fields(line)
   188  	if len(f) < 3 || f[0] != name || f[1] != "version" || f[2] == "devel" && !strings.HasPrefix(f[len(f)-1], "buildID=") {
   189  		base.Fatalf("go tool %s -V=full: unexpected output:\n\t%s", name, line)
   190  	}
   191  	if f[2] == "devel" {
   192  		// On the development branch, use the content ID part of the build ID.
   193  		id = contentID(f[len(f)-1])
   194  	} else {
   195  		// For a release, the output is like: "compile version go1.9.1". Use the whole line.
   196  		id = f[2]
   197  	}
   198  
   199  	b.id.Lock()
   200  	b.toolIDCache[name] = id
   201  	b.id.Unlock()
   202  
   203  	return id
   204  }
   205  
   206  // buildID returns the build ID found in the given file.
   207  // If no build ID is found, buildID returns the content hash of the file.
   208  func (b *Builder) buildID(file string) string {
   209  	b.id.Lock()
   210  	id := b.buildIDCache[file]
   211  	b.id.Unlock()
   212  
   213  	if id != "" {
   214  		return id
   215  	}
   216  
   217  	id, err := buildid.ReadFile(file)
   218  	if err != nil {
   219  		id = b.fileHash(file)
   220  	}
   221  
   222  	b.id.Lock()
   223  	b.buildIDCache[file] = id
   224  	b.id.Unlock()
   225  
   226  	return id
   227  }
   228  
   229  // fileHash returns the content hash of the named file.
   230  func (b *Builder) fileHash(file string) string {
   231  	sum, err := cache.FileHash(file)
   232  	if err != nil {
   233  		return ""
   234  	}
   235  	return hashToString(sum)
   236  }
   237  
   238  // useCache tries to satisfy the action a, which has action ID actionHash,
   239  // by using a cached result from an earlier build. At the moment, the only
   240  // cached result is the installed package or binary at target.
   241  // If useCache decides that the cache can be used, it sets a.buildID
   242  // and a.built for use by parent actions and then returns true.
   243  // Otherwise it sets a.buildID to a temporary build ID for use in the build
   244  // and returns false. When useCache returns false the expectation is that
   245  // the caller will build the target and then call updateBuildID to finish the
   246  // build ID computation.
   247  func (b *Builder) useCache(a *Action, p *load.Package, actionHash cache.ActionID, target string) bool {
   248  	// The second half of the build ID here is a placeholder for the content hash.
   249  	// It's important that the overall buildID be unlikely verging on impossible
   250  	// to appear in the output by chance, but that should be taken care of by
   251  	// the actionID half; if it also appeared in the input that would be like an
   252  	// engineered 96-bit partial SHA256 collision.
   253  	a.actionID = actionHash
   254  	actionID := hashToString(actionHash)
   255  	contentID := actionID // temporary placeholder, likely unique
   256  	a.buildID = actionID + buildIDSeparator + contentID
   257  
   258  	// Executable binaries also record the main build ID in the middle.
   259  	// See "Build IDs" comment above.
   260  	if a.Mode == "link" {
   261  		mainpkg := a.Deps[0]
   262  		a.buildID = actionID + buildIDSeparator + mainpkg.buildID + buildIDSeparator + contentID
   263  	}
   264  
   265  	// Check to see if target exists and matches the expected action ID.
   266  	// If so, it's up to date and we can reuse it instead of rebuilding it.
   267  	var buildID string
   268  	if target != "" && !cfg.BuildA {
   269  		var err error
   270  		buildID, err = buildid.ReadFile(target)
   271  		if err != nil && b.ComputeStaleOnly {
   272  			if p != nil && !p.Stale {
   273  				p.Stale = true
   274  				p.StaleReason = "target missing"
   275  			}
   276  			return true
   277  		}
   278  		if strings.HasPrefix(buildID, actionID+buildIDSeparator) {
   279  			a.buildID = buildID
   280  			a.built = target
   281  			// Poison a.Target to catch uses later in the build.
   282  			a.Target = "DO NOT USE - " + a.Mode
   283  			return true
   284  		}
   285  	}
   286  
   287  	// Special case for building a main package: if the only thing we
   288  	// want the package for is to link a binary, and the binary is
   289  	// already up-to-date, then to avoid a rebuild, report the package
   290  	// as up-to-date as well. See "Build IDs" comment above.
   291  	// TODO(rsc): Rewrite this code to use a TryCache func on the link action.
   292  	if target != "" && !cfg.BuildA && a.Mode == "build" && len(a.triggers) == 1 && a.triggers[0].Mode == "link" {
   293  		buildID, err := buildid.ReadFile(target)
   294  		if err == nil {
   295  			id := strings.Split(buildID, buildIDSeparator)
   296  			if len(id) == 4 && id[1] == actionID {
   297  				// Temporarily assume a.buildID is the package build ID
   298  				// stored in the installed binary, and see if that makes
   299  				// the upcoming link action ID a match. If so, report that
   300  				// we built the package, safe in the knowledge that the
   301  				// link step will not ask us for the actual package file.
   302  				// Note that (*Builder).LinkAction arranged that all of
   303  				// a.triggers[0]'s dependencies other than a are also
   304  				// dependencies of a, so that we can be sure that,
   305  				// other than a.buildID, b.linkActionID is only accessing
   306  				// build IDs of completed actions.
   307  				oldBuildID := a.buildID
   308  				a.buildID = id[1] + buildIDSeparator + id[2]
   309  				linkID := hashToString(b.linkActionID(a.triggers[0]))
   310  				if id[0] == linkID {
   311  					// Poison a.Target to catch uses later in the build.
   312  					a.Target = "DO NOT USE - main build pseudo-cache Target"
   313  					a.built = "DO NOT USE - main build pseudo-cache built"
   314  					return true
   315  				}
   316  				// Otherwise restore old build ID for main build.
   317  				a.buildID = oldBuildID
   318  			}
   319  		}
   320  	}
   321  
   322  	// Special case for linking a test binary: if the only thing we
   323  	// want the binary for is to run the test, and the test result is cached,
   324  	// then to avoid the link step, report the link as up-to-date.
   325  	// We avoid the nested build ID problem in the previous special case
   326  	// by recording the test results in the cache under the action ID half.
   327  	if !cfg.BuildA && len(a.triggers) == 1 && a.triggers[0].TryCache != nil && a.triggers[0].TryCache(b, a.triggers[0]) {
   328  		a.Target = "DO NOT USE -  pseudo-cache Target"
   329  		a.built = "DO NOT USE - pseudo-cache built"
   330  		return true
   331  	}
   332  
   333  	if b.ComputeStaleOnly {
   334  		// Invoked during go list only to compute and record staleness.
   335  		if p := a.Package; p != nil && !p.Stale {
   336  			p.Stale = true
   337  			if cfg.BuildA {
   338  				p.StaleReason = "build -a flag in use"
   339  			} else {
   340  				p.StaleReason = "build ID mismatch"
   341  				for _, p1 := range p.Internal.Imports {
   342  					if p1.Stale && p1.StaleReason != "" {
   343  						if strings.HasPrefix(p1.StaleReason, "stale dependency: ") {
   344  							p.StaleReason = p1.StaleReason
   345  							break
   346  						}
   347  						if strings.HasPrefix(p.StaleReason, "build ID mismatch") {
   348  							p.StaleReason = "stale dependency: " + p1.ImportPath
   349  						}
   350  					}
   351  				}
   352  			}
   353  		}
   354  		return true
   355  	}
   356  
   357  	// Check the build artifact cache.
   358  	// We treat hits in this cache as being "stale" for the purposes of go list
   359  	// (in effect, "stale" means whether p.Target is up-to-date),
   360  	// but we're still happy to use results from the build artifact cache.
   361  	if !cfg.BuildA {
   362  		if c := cache.Default(); c != nil {
   363  			outputID, size, err := c.Get(actionHash)
   364  			if err == nil {
   365  				file := c.OutputFile(outputID)
   366  				info, err1 := os.Stat(file)
   367  				buildID, err2 := buildid.ReadFile(file)
   368  				if err1 == nil && err2 == nil && info.Size() == size {
   369  					a.built = file
   370  					a.Target = "DO NOT USE - using cache"
   371  					a.buildID = buildID
   372  					return true
   373  				}
   374  			}
   375  		}
   376  	}
   377  
   378  	return false
   379  }
   380  
   381  // updateBuildID updates the build ID in the target written by action a.
   382  // It requires that useCache was called for action a and returned false,
   383  // and that the build was then carried out and given the temporary
   384  // a.buildID to record as the build ID in the resulting package or binary.
   385  // updateBuildID computes the final content ID and updates the build IDs
   386  // in the binary.
   387  func (b *Builder) updateBuildID(a *Action, target string, rewrite bool) error {
   388  	if cfg.BuildX || cfg.BuildN {
   389  		if rewrite {
   390  			b.Showcmd("", "%s # internal", joinUnambiguously(str.StringList(base.Tool("buildid"), "-w", target)))
   391  		}
   392  		if cfg.BuildN {
   393  			return nil
   394  		}
   395  	}
   396  
   397  	// Find occurrences of old ID and compute new content-based ID.
   398  	r, err := os.Open(target)
   399  	if err != nil {
   400  		return err
   401  	}
   402  	matches, hash, err := buildid.FindAndHash(r, a.buildID, 0)
   403  	r.Close()
   404  	if err != nil {
   405  		return err
   406  	}
   407  	newID := a.buildID[:strings.LastIndex(a.buildID, buildIDSeparator)] + buildIDSeparator + hashToString(hash)
   408  	if len(newID) != len(a.buildID) {
   409  		return fmt.Errorf("internal error: build ID length mismatch %q vs %q", a.buildID, newID)
   410  	}
   411  
   412  	// Replace with new content-based ID.
   413  	a.buildID = newID
   414  	if len(matches) == 0 {
   415  		// Assume the user specified -buildid= to override what we were going to choose.
   416  		return nil
   417  	}
   418  
   419  	if rewrite {
   420  		w, err := os.OpenFile(target, os.O_WRONLY, 0)
   421  		if err != nil {
   422  			return err
   423  		}
   424  		err = buildid.Rewrite(w, matches, newID)
   425  		if err != nil {
   426  			w.Close()
   427  			return err
   428  		}
   429  		if err := w.Close(); err != nil {
   430  			return err
   431  		}
   432  	}
   433  
   434  	// Cache package builds, but not binaries (link steps).
   435  	// The expectation is that binaries are not reused
   436  	// nearly as often as individual packages, and they're
   437  	// much larger, so the cache-footprint-to-utility ratio
   438  	// of binaries is much lower for binaries.
   439  	// Not caching the link step also makes sure that repeated "go run" at least
   440  	// always rerun the linker, so that they don't get too fast.
   441  	// (We don't want people thinking go is a scripting language.)
   442  	// Note also that if we start caching binaries, then we will
   443  	// copy the binaries out of the cache to run them, and then
   444  	// that will mean the go process is itself writing a binary
   445  	// and then executing it, so we will need to defend against
   446  	// ETXTBSY problems as discussed in exec.go and golang.org/issue/22220.
   447  	if c := cache.Default(); c != nil && a.Mode == "build" {
   448  		r, err := os.Open(target)
   449  		if err == nil {
   450  			c.Put(a.actionID, r)
   451  			r.Close()
   452  		}
   453  	}
   454  
   455  	return nil
   456  }