kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/platform/kzip/kzip.go (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package kzip implements the kzip compilation storage file format.
    18  //
    19  // The package exports two types of interest: A kzip.Reader can be used to read
    20  // the contents of an existing kzip archive, and a kzip.Writer can be used to
    21  // construct a new kzip archive.
    22  //
    23  // Reading an Archive:
    24  //
    25  //	r, err := kzip.NewReader(file, size)
    26  //	...
    27  //
    28  //	// Look up a compilation record by its digest.
    29  //	unit, err := r.Lookup(unitDigest)
    30  //	...
    31  //
    32  //	// Scan all the compilation records stored.
    33  //	err := r.Scan(func(unit *kzip.Unit) error {
    34  //	   if hasInterestingProperty(unit) {
    35  //	      doStuffWith(unit)
    36  //	   }
    37  //	   return nil
    38  //	})
    39  //
    40  //	// Open a reader for a stored file.
    41  //	rc, err := r.Open(fileDigest)
    42  //	...
    43  //	defer rc.Close()
    44  //
    45  //	// Read the complete contents of a stored file.
    46  //	bits, err := r.ReadAll(fileDigest)
    47  //	...
    48  //
    49  // Writing an Archive:
    50  //
    51  //	w, err := kzip.NewWriter(file)
    52  //	...
    53  //
    54  //	// Add a compilation record and (optional) index data.
    55  //	udigest, err := w.AddUnit(unit, nil)
    56  //	...
    57  //
    58  //	// Add file contents.
    59  //	fdigest, err := w.AddFile(file)
    60  //	...
    61  package kzip // import "kythe.io/kythe/go/platform/kzip"
    62  
    63  import (
    64  	"archive/zip"
    65  	"bytes"
    66  	"context"
    67  	"crypto/sha256"
    68  	"encoding/hex"
    69  	"errors"
    70  	"fmt"
    71  	"io"
    72  	"io/ioutil"
    73  	"os"
    74  	"path"
    75  	"sort"
    76  	"strconv"
    77  	"strings"
    78  	"sync"
    79  	"time"
    80  
    81  	"kythe.io/kythe/go/platform/kcd/kythe"
    82  	"kythe.io/kythe/go/util/log"
    83  	"kythe.io/kythe/go/util/ptypes"
    84  
    85  	"bitbucket.org/creachadair/stringset"
    86  	"github.com/golang/protobuf/proto"
    87  	"golang.org/x/sync/errgroup"
    88  	"google.golang.org/protobuf/encoding/protojson"
    89  
    90  	apb "kythe.io/kythe/proto/analysis_go_proto"
    91  	spb "kythe.io/kythe/proto/storage_go_proto"
    92  
    93  	// These are common detail messages used by Kythe compilations, and
    94  	// required for JSON (un)marshaling to work.
    95  	_ "kythe.io/kythe/proto/buildinfo_go_proto"
    96  	_ "kythe.io/kythe/proto/cxx_go_proto"
    97  	_ "kythe.io/kythe/proto/filecontext_go_proto"
    98  	_ "kythe.io/kythe/proto/go_go_proto"
    99  	_ "kythe.io/kythe/proto/java_go_proto"
   100  )
   101  
   102  // Encoding describes how compilation units will be encoded when written to a kzip.
   103  type Encoding int
   104  
   105  const (
   106  	// EncodingJSON specifies to use JSON encoding
   107  	EncodingJSON Encoding = 1
   108  	// EncodingProto specifies to use Proto encoding
   109  	EncodingProto Encoding = 2
   110  	// EncodingAll specifies to encode using all known encodings
   111  	EncodingAll Encoding = EncodingJSON | EncodingProto
   112  
   113  	prefixJSON  = "units"
   114  	prefixProto = "pbunits"
   115  )
   116  
   117  // Compilation is a CompilationUnit with the contents for all of its required inputs.
   118  type Compilation struct {
   119  	Proto *apb.CompilationUnit `json:"compilation"`
   120  	Files []*apb.FileData      `json:"files"`
   121  }
   122  
   123  var (
   124  	// Use a constant file modification time in the kzip so file diffs only compare the contents,
   125  	// not when the kzips were created.
   126  	modifiedTime = time.Unix(0, 0)
   127  )
   128  
   129  // EncodingFor converts a string to an Encoding.
   130  func EncodingFor(v string) (Encoding, error) {
   131  	v = strings.ToUpper(v)
   132  	switch {
   133  	case v == "ALL":
   134  		return EncodingAll, nil
   135  	case v == "JSON":
   136  		return EncodingJSON, nil
   137  	case v == "PROTO":
   138  		return EncodingProto, nil
   139  	default:
   140  		return EncodingProto, fmt.Errorf("unknown encoding %s", v)
   141  	}
   142  }
   143  
   144  // String stringifies an Encoding
   145  func (e Encoding) String() string {
   146  	switch {
   147  	case e == EncodingAll:
   148  		return "All"
   149  	case e == EncodingJSON:
   150  		return "JSON"
   151  	case e == EncodingProto:
   152  		return "Proto"
   153  	default:
   154  		return "Encoding" + strconv.FormatInt(int64(e), 10)
   155  	}
   156  }
   157  
   158  // DefaultEncoding returns the default kzip encoding
   159  func DefaultEncoding() Encoding {
   160  	if e := os.Getenv("KYTHE_KZIP_ENCODING"); e != "" {
   161  		enc, err := EncodingFor(e)
   162  		if err == nil {
   163  			return enc
   164  		}
   165  		log.Errorf("unknown kzip encoding: %s", e)
   166  	}
   167  	return EncodingProto
   168  }
   169  
   170  // A Reader permits reading and scanning compilation records and file contents
   171  // stored in a .kzip archive. The Lookup and Scan methods are mutually safe for
   172  // concurrent use by multiple goroutines.
   173  type Reader struct {
   174  	zip *zip.Reader
   175  
   176  	// The archives written by this library always use "root/" for the root
   177  	// directory, but it's not required by the spec. Use whatever name the
   178  	// archive actually specifies in the leading directory.
   179  	root string
   180  
   181  	// The prefix used for the compilation unit directory; one of
   182  	// prefixJSON or prefixProto
   183  	unitsPrefix string
   184  }
   185  
   186  // NewReader constructs a new Reader that consumes zip data from r, whose total
   187  // size in bytes is given.
   188  func NewReader(r io.ReaderAt, size int64) (*Reader, error) {
   189  	archive, err := zip.NewReader(r, size)
   190  	if err != nil {
   191  		return nil, err
   192  	}
   193  	// Order the files in the archive by path, so we can binary search.
   194  	sort.Slice(archive.File, func(i, j int) bool {
   195  		return archive.File[i].Name < archive.File[j].Name
   196  	})
   197  
   198  	if len(archive.File) == 0 {
   199  		return nil, errors.New("archive is empty")
   200  	} else if fi := archive.File[0].FileInfo(); !fi.IsDir() {
   201  		return nil, fmt.Errorf("archive root directory missing: expected a directory but got %v - see https://kythe.io/docs/kythe-kzip.html#_directory_and_file_layout", archive.File[0].Name)
   202  	}
   203  	root := archive.File[0].Name
   204  	pref, err := unitPrefix(root, archive.File)
   205  	if err != nil {
   206  		return nil, err
   207  	}
   208  	return &Reader{
   209  		zip:         archive,
   210  		root:        root,
   211  		unitsPrefix: pref,
   212  	}, nil
   213  }
   214  
   215  func unitPrefix(root string, fs []*zip.File) (string, error) {
   216  	jsonDir := root + prefixJSON + "/"
   217  	protoDir := root + prefixProto + "/"
   218  	j := sort.Search(len(fs), func(i int) bool {
   219  		return fs[i].Name > jsonDir
   220  	})
   221  	hasJSON := j < len(fs) && strings.HasPrefix(fs[j].Name, jsonDir)
   222  	p := sort.Search(len(fs), func(i int) bool {
   223  		return fs[i].Name > protoDir
   224  	})
   225  	hasProto := p < len(fs) && strings.HasPrefix(fs[p].Name, protoDir)
   226  	if hasJSON && hasProto {
   227  		// validate that they have identical units based on hash
   228  		for p < len(fs) && j < len(fs) {
   229  			ispb := strings.HasPrefix(fs[p].Name, protoDir)
   230  			isjson := strings.HasPrefix(fs[j].Name, jsonDir)
   231  			if ispb != isjson {
   232  				return "", fmt.Errorf("both proto and JSON units found but are not identical")
   233  			}
   234  			if !ispb {
   235  				break
   236  			}
   237  			pdigest := strings.Split(fs[p].Name, "/")[2]
   238  			jdigest := strings.Split(fs[j].Name, "/")[2]
   239  			if pdigest != jdigest {
   240  				return "", fmt.Errorf("both proto and JSON units found but are not identical")
   241  			}
   242  			p++
   243  			j++
   244  		}
   245  	}
   246  	if hasProto {
   247  		return prefixProto, nil
   248  	}
   249  	return prefixJSON, nil
   250  }
   251  
   252  // Encoding exposes the file encoding being used to read compilation units.
   253  func (r *Reader) Encoding() (Encoding, error) {
   254  	switch {
   255  	case r.unitsPrefix == prefixJSON:
   256  		return EncodingJSON, nil
   257  	case r.unitsPrefix == prefixProto:
   258  		return EncodingProto, nil
   259  	}
   260  	return EncodingAll, fmt.Errorf("unknown encoding prefix: %v", r.unitsPrefix)
   261  }
   262  
   263  func (r *Reader) unitPath(digest string) string { return path.Join(r.root, r.unitsPrefix, digest) }
   264  func (r *Reader) filePath(digest string) string { return path.Join(r.root, "files", digest) }
   265  
   266  // ErrDigestNotFound is returned when a requested compilation unit or file
   267  // digest is not found.
   268  var ErrDigestNotFound = errors.New("digest not found")
   269  
   270  // ErrUnitExists is returned by AddUnit when adding the same compilation
   271  // multiple times.
   272  var ErrUnitExists = errors.New("unit already exists")
   273  
   274  func (r *Reader) readUnit(digest string, f *zip.File) (*Unit, error) {
   275  	rc, err := f.Open()
   276  	if err != nil {
   277  		return nil, err
   278  	}
   279  	rec := make([]byte, f.UncompressedSize64)
   280  	_, err = io.ReadFull(rc, rec)
   281  	rc.Close()
   282  	if err != nil {
   283  		return nil, err
   284  	}
   285  	var msg apb.IndexedCompilation
   286  	if r.unitsPrefix == prefixProto {
   287  		if err := proto.Unmarshal(rec, &msg); err != nil {
   288  			return nil, fmt.Errorf("error unmarshaling for %s: %s", digest, err)
   289  		}
   290  	} else if err := protojson.Unmarshal(rec, &msg); err != nil {
   291  		return nil, err
   292  	}
   293  	return &Unit{
   294  		Digest: digest,
   295  		Proto:  msg.Unit,
   296  		Index:  msg.Index,
   297  	}, nil
   298  }
   299  
   300  // firstIndex returns the first index in the archive's file list whose
   301  // path starts with prefix, or -1 if no such index exists.
   302  func (r *Reader) firstIndex(prefix string) int {
   303  	fs := r.zip.File
   304  	n := sort.Search(len(fs), func(i int) bool {
   305  		return fs[i].Name >= prefix
   306  	})
   307  	if n >= len(fs) {
   308  		return -1
   309  	}
   310  	if !strings.HasPrefix(fs[n].Name, prefix) {
   311  		return -1
   312  	}
   313  	return n
   314  }
   315  
   316  // Lookup returns the specified compilation from the archive, if it exists.  If
   317  // the requested digest is not in the archive, ErrDigestNotFound is returned.
   318  func (r *Reader) Lookup(unitDigest string) (*Unit, error) {
   319  	needle := r.unitPath(unitDigest)
   320  	pos := r.firstIndex(needle)
   321  	if pos >= 0 {
   322  		if f := r.zip.File[pos]; f.Name == needle {
   323  			return r.readUnit(unitDigest, f)
   324  		}
   325  	}
   326  	return nil, ErrDigestNotFound
   327  }
   328  
   329  // A ScanOption configures the behavior of scanning a kzip file.
   330  type ScanOption interface{ isScanOption() }
   331  
   332  type readConcurrency int
   333  
   334  func (readConcurrency) isScanOption() {}
   335  
   336  // ReadConcurrency returns a ScanOption that configures the max concurrency of
   337  // reading compilation units within a kzip archive.
   338  func ReadConcurrency(n int) ScanOption {
   339  	return readConcurrency(n)
   340  }
   341  
   342  func (r *Reader) canonicalUnits() (string, []*zip.File) {
   343  	prefix := r.unitPath("") + "/"
   344  	pos := r.firstIndex(prefix)
   345  	if pos < 0 {
   346  		return "", nil
   347  	}
   348  	var res []*zip.File
   349  	for _, file := range r.zip.File[pos:] {
   350  		if !strings.HasPrefix(file.Name, prefix) {
   351  			break
   352  		}
   353  		if file.Name == prefix {
   354  			continue // tolerate an empty units directory entry
   355  		}
   356  		res = append(res, file)
   357  
   358  	}
   359  	return prefix, res
   360  }
   361  
   362  // Scan scans all the compilations stored in the archive, and invokes f for
   363  // each compilation record. If f reports an error, the scan is terminated and
   364  // that error is propagated to the caller of Scan.  At most 1 invocation of f
   365  // will occur at any one time.
   366  func (r *Reader) Scan(f func(*Unit) error, opts ...ScanOption) error {
   367  	concurrency := 1
   368  	for _, opt := range opts {
   369  		switch opt := opt.(type) {
   370  		case readConcurrency:
   371  			if n := int(opt); n > 0 {
   372  				concurrency = n
   373  			}
   374  		default:
   375  			return fmt.Errorf("unknown ScanOption type: %T", opt)
   376  		}
   377  	}
   378  
   379  	prefix, fileUnits := r.canonicalUnits()
   380  	if len(fileUnits) == 0 {
   381  		return nil
   382  	}
   383  
   384  	ctx, cancel := context.WithCancel(context.Background())
   385  	defer cancel()
   386  	g, ctx := errgroup.WithContext(ctx)
   387  
   388  	files := make(chan *zip.File)
   389  
   390  	g.Go(func() error {
   391  		defer close(files)
   392  		for _, file := range fileUnits {
   393  			select {
   394  			case <-ctx.Done():
   395  				return nil
   396  			case files <- file:
   397  			}
   398  		}
   399  		return nil
   400  	})
   401  	units := make(chan *Unit)
   402  	var wg sync.WaitGroup
   403  	for i := 0; i < concurrency; i++ {
   404  		wg.Add(1)
   405  		g.Go(func() error {
   406  			defer wg.Done()
   407  			for file := range files {
   408  				digest := strings.TrimPrefix(file.Name, prefix)
   409  				unit, err := r.readUnit(digest, file)
   410  				if err != nil {
   411  					return err
   412  				}
   413  				select {
   414  				case <-ctx.Done():
   415  					return nil
   416  				case units <- unit:
   417  				}
   418  			}
   419  			return nil
   420  		})
   421  	}
   422  	go func() { wg.Wait(); close(units) }()
   423  	for unit := range units {
   424  		select {
   425  		case <-ctx.Done():
   426  			return g.Wait()
   427  		default:
   428  			if err := f(unit); err != nil {
   429  				return err
   430  			}
   431  		}
   432  	}
   433  	return g.Wait()
   434  }
   435  
   436  // Open opens a reader on the contents of the specified file digest.  If the
   437  // requested digest is not in the archive, ErrDigestNotFound is returned.  The
   438  // caller must close the reader when it is no longer needed.
   439  func (r *Reader) Open(fileDigest string) (io.ReadCloser, error) {
   440  	needle := r.filePath(fileDigest)
   441  	if pos := r.firstIndex(needle); pos >= 0 {
   442  		if f := r.zip.File[pos]; f.Name == needle {
   443  			return f.Open()
   444  		}
   445  	}
   446  	return nil, ErrDigestNotFound
   447  }
   448  
   449  // ReadAll returns the complete contents of the file with the specified digest.
   450  // It is a convenience wrapper for Open followed by ioutil.ReadAll.
   451  func (r *Reader) ReadAll(fileDigest string) ([]byte, error) {
   452  	f, err := r.Open(fileDigest)
   453  	if err == nil {
   454  		defer f.Close()
   455  		return ioutil.ReadAll(f)
   456  	}
   457  	return nil, err
   458  }
   459  
   460  // A Unit represents a compilation record read from a kzip archive.
   461  type Unit struct {
   462  	Digest string
   463  	Proto  *apb.CompilationUnit
   464  	Index  *apb.IndexedCompilation_Index
   465  }
   466  
   467  // A Writer permits construction of a .kzip archive.
   468  type Writer struct {
   469  	mu  sync.Mutex
   470  	zip *zip.Writer
   471  	fd  stringset.Set // file digests already written
   472  	ud  stringset.Set // unit digests already written
   473  	c   io.Closer     // a closer for the underlying writer (may be nil)
   474  
   475  	encoding Encoding // What encoding to use
   476  }
   477  
   478  // WriterOption describes options when creating a Writer
   479  type WriterOption func(*Writer)
   480  
   481  // WithEncoding sets the encoding to be used by a Writer
   482  func WithEncoding(e Encoding) WriterOption {
   483  	return func(w *Writer) {
   484  		w.encoding = e
   485  	}
   486  }
   487  
   488  // NewWriter constructs a new empty Writer that delivers output to w.  The
   489  // AddUnit and AddFile methods are safe for use by concurrent goroutines.
   490  func NewWriter(w io.Writer, options ...WriterOption) (*Writer, error) {
   491  	archive := zip.NewWriter(w)
   492  	// Create an entry for the root directory, which must be first.
   493  	root := &zip.FileHeader{
   494  		Name:     "root/",
   495  		Comment:  "kzip root directory",
   496  		Modified: modifiedTime,
   497  	}
   498  	root.SetMode(os.ModeDir | 0755)
   499  	if _, err := archive.CreateHeader(root); err != nil {
   500  		return nil, err
   501  	}
   502  	archive.SetComment("Kythe kzip archive")
   503  
   504  	kw := &Writer{
   505  		zip:      archive,
   506  		fd:       stringset.New(),
   507  		ud:       stringset.New(),
   508  		encoding: DefaultEncoding(),
   509  	}
   510  	for _, opt := range options {
   511  		opt(kw)
   512  	}
   513  	return kw, nil
   514  }
   515  
   516  // NewWriteCloser behaves as NewWriter, but arranges that when the *Writer is
   517  // closed it also closes wc.
   518  func NewWriteCloser(wc io.WriteCloser, options ...WriterOption) (*Writer, error) {
   519  	w, err := NewWriter(wc, options...)
   520  	if err == nil {
   521  		w.c = wc
   522  	}
   523  	return w, err
   524  }
   525  
   526  // toJSON defines the encoding format for compilation messages.
   527  var toJSON = &protojson.MarshalOptions{UseProtoNames: true}
   528  
   529  // AddUnit adds a new compilation record to be added to the archive, returning
   530  // the hex-encoded SHA256 digest of the unit's contents. It is legal for index
   531  // to be nil, in which case no index terms will be added.
   532  //
   533  // If the same compilation is added multiple times, AddUnit returns the digest
   534  // of the duplicated compilation along with ErrUnitExists to all callers after
   535  // the first. The existing unit is not modified.
   536  func (w *Writer) AddUnit(cu *apb.CompilationUnit, index *apb.IndexedCompilation_Index) (string, error) {
   537  	unit := kythe.Unit{Proto: cu}
   538  	unit.Canonicalize()
   539  	digest := unit.Digest()
   540  
   541  	w.mu.Lock()
   542  	defer w.mu.Unlock()
   543  	if w.ud.Contains(digest) {
   544  		return digest, ErrUnitExists
   545  	}
   546  
   547  	if w.encoding&EncodingJSON != 0 {
   548  		f, err := w.zip.CreateHeader(newFileHeader("root", prefixJSON, digest))
   549  		if err != nil {
   550  			return "", err
   551  		}
   552  		rec, err := toJSON.Marshal(&apb.IndexedCompilation{
   553  			Unit:  unit.Proto,
   554  			Index: index,
   555  		})
   556  		if err != nil {
   557  			return "", err
   558  		}
   559  		if _, err := f.Write(rec); err != nil {
   560  			return "", err
   561  		}
   562  	}
   563  	if w.encoding&EncodingProto != 0 {
   564  		f, err := w.zip.CreateHeader(newFileHeader("root", prefixProto, digest))
   565  		if err != nil {
   566  			return "", err
   567  		}
   568  		rec, err := proto.Marshal(&apb.IndexedCompilation{
   569  			Unit:  unit.Proto,
   570  			Index: index,
   571  		})
   572  		if err != nil {
   573  			return "", err
   574  		}
   575  		_, err = f.Write(rec)
   576  		if err != nil {
   577  			return "", err
   578  		}
   579  	}
   580  	w.ud.Add(digest)
   581  	return digest, nil
   582  }
   583  
   584  // AddFile copies the complete contents of r into the archive as a new file
   585  // entry, returning the hex-encoded SHA256 digest of the file's contents.
   586  func (w *Writer) AddFile(r io.Reader) (string, error) {
   587  	// Buffer the file contents and compute their digest.
   588  	// We have to do this ahead of time, because we have to provide the name of
   589  	// the file before we can start writing its contents.
   590  	var buf bytes.Buffer
   591  	hash := sha256.New()
   592  	if _, err := io.Copy(io.MultiWriter(hash, &buf), r); err != nil {
   593  		return "", err
   594  	}
   595  	digest := hex.EncodeToString(hash.Sum(nil))
   596  
   597  	w.mu.Lock()
   598  	defer w.mu.Unlock()
   599  	if w.fd.Contains(digest) {
   600  		return digest, nil // already written
   601  	}
   602  
   603  	f, err := w.zip.CreateHeader(newFileHeader("root", "files", digest))
   604  	if err != nil {
   605  		return "", err
   606  	}
   607  	if _, err := io.Copy(f, &buf); err != nil {
   608  		return "", err
   609  	}
   610  	w.fd.Add(digest)
   611  	return digest, nil
   612  }
   613  
   614  // Close closes the writer, flushing any remaining unwritten data out to the
   615  // underlying zip file. It is safe to close w arbitrarily many times; all calls
   616  // after the first will report nil.
   617  func (w *Writer) Close() error {
   618  	w.mu.Lock()
   619  	defer w.mu.Unlock()
   620  	if w.zip != nil {
   621  		err := w.zip.Close()
   622  		w.zip = nil
   623  		if w.c != nil {
   624  			if cerr := w.c.Close(); err == nil {
   625  				return cerr
   626  			}
   627  		}
   628  		return err
   629  	}
   630  	return nil
   631  }
   632  
   633  func newFileHeader(parts ...string) *zip.FileHeader {
   634  	fh := &zip.FileHeader{Name: path.Join(parts...), Method: zip.Deflate}
   635  	fh.SetMode(0600)
   636  	fh.Modified = modifiedTime
   637  	return fh
   638  }
   639  
   640  // Scan is a convenience function that creates a *Reader from f and invokes its
   641  // Scan method with the given callback. Each invocation of scan is passed the
   642  // reader associated with f, along with the current compilation unit.
   643  func Scan(f File, scan func(*Reader, *Unit) error, opts ...ScanOption) error {
   644  	size, err := f.Seek(0, io.SeekEnd)
   645  	if err != nil {
   646  		return fmt.Errorf("getting file size: %v", err)
   647  	}
   648  	r, err := NewReader(f, size)
   649  	if err != nil {
   650  		return err
   651  	}
   652  	return r.Scan(func(unit *Unit) error {
   653  		return scan(r, unit)
   654  	}, opts...)
   655  }
   656  
   657  // A File represents the file capabilities needed to scan a kzip file.
   658  type File interface {
   659  	io.ReaderAt
   660  	io.Seeker
   661  }
   662  
   663  // FileData creates a file data protobuf message by fully reading the contents
   664  // of r, having the designated path.
   665  func FileData(path string, r io.Reader) (*apb.FileData, error) {
   666  	var buf bytes.Buffer
   667  	hash := sha256.New()
   668  
   669  	w := io.MultiWriter(&buf, hash)
   670  	if _, err := io.Copy(w, r); err != nil {
   671  		return nil, err
   672  	}
   673  	digest := hex.EncodeToString(hash.Sum(nil))
   674  	return &apb.FileData{
   675  		Content: buf.Bytes(),
   676  		Info: &apb.FileInfo{
   677  			Path:   path,
   678  			Digest: digest,
   679  		},
   680  	}, nil
   681  }
   682  
   683  // Fetch implements the analysis.Fetcher interface for files attached to c.
   684  // If digest == "", files are matched by path only.
   685  func (c *Compilation) Fetch(path, digest string) ([]byte, error) {
   686  	for _, f := range c.Files {
   687  		info := f.GetInfo()
   688  		fp := info.Path
   689  		fd := info.Digest
   690  		if path == fp && (digest == "" || digest == fd) {
   691  			return f.Content, nil
   692  		}
   693  		if digest != "" && digest == fd {
   694  			return f.Content, nil
   695  		}
   696  	}
   697  	return nil, os.ErrNotExist
   698  }
   699  
   700  // Unit returns the CompilationUnit associated with c, creating a new empty one
   701  // if necessary.
   702  func (c *Compilation) Unit() *apb.CompilationUnit {
   703  	if c.Proto == nil {
   704  		c.Proto = new(apb.CompilationUnit)
   705  	}
   706  	return c.Proto
   707  }
   708  
   709  // AddFile adds an input file to the compilation by fully reading r.  The file
   710  // is added to the required inputs, attributed to the designated path, and also
   711  // to the file data slice.  If v != nil it is used as the vname of the input
   712  // added.
   713  func (c *Compilation) AddFile(path string, r io.Reader, v *spb.VName, details ...proto.Message) error {
   714  	var anys []*ptypes.Any
   715  	for _, d := range details {
   716  		any, err := ptypes.MarshalAny(d)
   717  		if err != nil {
   718  			return fmt.Errorf("unable to marshal %T to Any: %v", d, err)
   719  		}
   720  		anys = append(anys, any)
   721  	}
   722  	fd, err := FileData(path, r)
   723  	if err != nil {
   724  		return err
   725  	}
   726  	c.Files = append(c.Files, fd)
   727  	unit := c.Unit()
   728  	unit.RequiredInput = append(unit.RequiredInput, &apb.CompilationUnit_FileInput{
   729  		VName:   v,
   730  		Info:    fd.Info,
   731  		Details: anys,
   732  	})
   733  	return nil
   734  }
   735  
   736  // AddDetails adds the specified details message to the compilation.
   737  func (c *Compilation) AddDetails(msg proto.Message) error {
   738  	details, err := ptypes.MarshalAny(msg)
   739  	if err != nil {
   740  		return err
   741  	}
   742  	unit := c.Unit()
   743  	unit.Details = append(unit.Details, details)
   744  	return nil
   745  }