kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/platform/kcd/kcd.go (about)

     1  /*
     2   * Copyright 2016 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package kcd defines an interface and utility functions for the
    18  // implementation of a Kythe compilation database.
    19  //
    20  // Design documentation: kythe/docs/kythe-compilation-database.txt
    21  package kcd // import "kythe.io/kythe/go/platform/kcd"
    22  
    23  import (
    24  	"context"
    25  	"crypto/sha256"
    26  	"encoding"
    27  	"encoding/hex"
    28  	"encoding/json"
    29  	"fmt"
    30  	"io"
    31  	"regexp"
    32  	"strings"
    33  	"time"
    34  
    35  	spb "kythe.io/kythe/proto/storage_go_proto"
    36  )
    37  
    38  // Reader represents read-only access to an underlying storage layer used to
    39  // implement a compilation database.
    40  type Reader interface {
    41  	// Revisions calls f with each known revision matching the given filter.
    42  	// If filter == nil or is empty, all known revisions are reported.
    43  	// If f returns an error, that error is returned from Revisions.
    44  	// Timestamps are returned in UTC.
    45  	Revisions(_ context.Context, filter *RevisionsFilter, f func(Revision) error) error
    46  
    47  	// Find calls f with the digest of each known compilation matching filter.
    48  	// If filter == nil or is empty, no compilations are reported.
    49  	// If f returns an error, that error is returned from Find.
    50  	Find(_ context.Context, filter *FindFilter, f func(string) error) error
    51  
    52  	// Units calls f with the digest, format key, and content of each specified
    53  	// compilation that exists in the store.
    54  	// If f returns an error, that error is returned from Units.
    55  	Units(_ context.Context, unitDigests []string, f func(digest, key string, data []byte) error) error
    56  
    57  	// Files calls f with the digest and content of each specified file that
    58  	// exists in the store.
    59  	// If f returns an error, that error is returned from Files.
    60  	Files(_ context.Context, fileDigests []string, f func(string, []byte) error) error
    61  
    62  	// FilesExist calls f with the digest of each specified file that exists in
    63  	// the store.
    64  	// If f returns an error, that error is returned from FilesExist.
    65  	FilesExist(_ context.Context, fileDigests []string, f func(string) error) error
    66  }
    67  
    68  // Writer represents write access to an underlying storage layer used to
    69  // implement a compilation database.
    70  type Writer interface {
    71  	// WriteRevision records the specified revision into the store at the given
    72  	// timestamp.  It is an error if rev.Revision == "" or rev.Corpus == "".
    73  	// If replace is true, any previous version of this marker is discarded
    74  	// before writing.
    75  	WriteRevision(_ context.Context, rev Revision, replace bool) error
    76  
    77  	// WriteUnit records unit in the store at the given revision, and returns
    78  	// the digest of the stored unit.  It is an error if rev is invalid per
    79  	// WriteRevision.
    80  	WriteUnit(_ context.Context, rev Revision, formatKey string, unit Unit) (string, error)
    81  
    82  	// WriteFile fully reads r and records its content as a file in the store.
    83  	// Returns the digest of the stored file.
    84  	WriteFile(_ context.Context, r io.Reader) (string, error)
    85  }
    86  
    87  // ReadWriter expresses the capacity to both read and write a compilation database.
    88  type ReadWriter interface {
    89  	Reader
    90  	Writer
    91  }
    92  
    93  // Deleter expresses the capacity to delete data from a compilation database.
    94  // Each of the methods of this interface should return an error that satisfies
    95  // os.IsNotExist if its argument does not match any known entries.
    96  // Not all writable databases must support this interface.
    97  type Deleter interface {
    98  	// DeleteUnit removes the specified unit from the database.
    99  	DeleteUnit(_ context.Context, unitDigest string) error
   100  
   101  	// DeleteFile removes the specified file from the database.
   102  	DeleteFile(_ context.Context, fileDigest string) error
   103  
   104  	// DeleteRevision removes the specified revision marker from the database.
   105  	// It is an error if rev == "" or corpus == "".  All timestamps for the
   106  	// matching revision are discarded.
   107  	DeleteRevision(_ context.Context, revision, corpus string) error
   108  }
   109  
   110  // ReadWriteDeleter expresses the capacity to read, write, and delete data in a
   111  // compilation database.
   112  type ReadWriteDeleter interface {
   113  	Reader
   114  	Writer
   115  	Deleter
   116  }
   117  
   118  // RevisionsFilter gives constraints on which revisions are matched by a call to
   119  // the Revisions method of compdb.Reader.
   120  type RevisionsFilter struct {
   121  	Revision string    // If set return revision markers matching this RE2.
   122  	Corpus   string    // If set, return only revisions for this corpus.
   123  	Until    time.Time // If nonzero, return only revisions at or before this time.
   124  	Since    time.Time // If nonzero, return only revisions at or after this time.
   125  }
   126  
   127  // Compile compiles the filter into a matching function that reports whether
   128  // its argument matches the original filter.
   129  func (rf *RevisionsFilter) Compile() (func(Revision) bool, error) {
   130  	// A nil or empty filter matches all revisions.
   131  	if rf == nil || (rf.Revision == "" && rf.Corpus == "" && rf.Until.IsZero() && rf.Since.IsZero()) {
   132  		return func(Revision) bool { return true }, nil
   133  	}
   134  	var matchRevision, matchCorpus func(...string) bool
   135  	var err error
   136  	if matchRevision, err = singleMatcher(rf.Revision); err != nil {
   137  		return nil, err
   138  	}
   139  	if matchCorpus, err = singleMatcher(regexp.QuoteMeta(rf.Corpus)); err != nil {
   140  		return nil, err
   141  	}
   142  	return func(rev Revision) bool {
   143  		return matchRevision(rev.Revision) &&
   144  			matchCorpus(rev.Corpus) &&
   145  			(rf.Until.IsZero() || !rf.Until.Before(rev.Timestamp.In(time.UTC))) &&
   146  			(rf.Since.IsZero() || !rev.Timestamp.In(time.UTC).Before(rf.Since))
   147  	}, nil
   148  }
   149  
   150  // A Revision represents a single revision stored in the database.
   151  type Revision struct {
   152  	Revision, Corpus string
   153  	Timestamp        time.Time
   154  }
   155  
   156  func (r Revision) String() string {
   157  	return fmt.Sprintf("#<rev %q corpus=%q at %v>", r.Revision, r.Corpus, r.Timestamp)
   158  }
   159  
   160  // IsValid returns an error if the revision or corpus fields are invalid.
   161  func (r Revision) IsValid() error {
   162  	if !IsRevisionValid(r.Revision) {
   163  		return fmt.Errorf("invalid revision %q", r.Revision)
   164  	} else if !IsCorpusValid(r.Corpus) {
   165  		return fmt.Errorf("invalid corpus %q", r.Corpus)
   166  	}
   167  	return nil
   168  }
   169  
   170  // IsRevisionValid reports whether r is a valid revision marker.
   171  // A marker is valid if it is nonempty and does not contain whitespace.
   172  func IsRevisionValid(r string) bool { return r != "" && !containsWhitespace(r) }
   173  
   174  // IsCorpusValid reports whether c is a valid corpus marker.
   175  // A corpus is valid if it is nonempty and does not contain whitespace.
   176  func IsCorpusValid(c string) bool { return c != "" && !containsWhitespace(c) }
   177  
   178  // FindFilter gives constraints on which compilations are matched by a call to
   179  // the Find method of compdb.Reader.
   180  type FindFilter struct {
   181  	UnitCorpus  []string // Include only these unit corpus labels (exact match)
   182  	Revisions   []string // Include only these revisions (exact match).
   183  	Languages   []string // Include only these languages (Kythe language names).
   184  	BuildCorpus []string // Include only these build corpus labels (exact match).
   185  
   186  	Targets []*regexp.Regexp // Include only compilations for these targets.
   187  	Sources []*regexp.Regexp // Include only compilations for these sources.
   188  	Outputs []*regexp.Regexp // include only compilations for these outputs.
   189  }
   190  
   191  // Compile returns a compiled filter that matches index terms based on ff.
   192  // Returns nil if ff is an empty filter.
   193  func (ff *FindFilter) Compile() (*CompiledFilter, error) {
   194  	if ff.IsEmpty() {
   195  		return nil, nil
   196  	}
   197  	var cf CompiledFilter
   198  	var err error
   199  	cf.RevisionMatches, err = stringMatcher(ff.Revisions...)
   200  	if err != nil {
   201  		return nil, err
   202  	}
   203  	cf.BuildCorpusMatches, err = stringMatcher(ff.BuildCorpus...)
   204  	if err != nil {
   205  		return nil, err
   206  	}
   207  	cf.UnitCorpusMatches, err = stringMatcher(ff.UnitCorpus...)
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  	cf.LanguageMatches, err = stringMatcher(ff.Languages...)
   212  	if err != nil {
   213  		return nil, err
   214  	}
   215  	cf.TargetMatches, err = combineRegexps(ff.Targets)
   216  	if err != nil {
   217  		return nil, err
   218  	}
   219  	cf.OutputMatches, err = combineRegexps(ff.Outputs)
   220  	if err != nil {
   221  		return nil, err
   222  	}
   223  	cf.SourcesMatch, err = combineRegexps(ff.Sources)
   224  	if err != nil {
   225  		return nil, err
   226  	}
   227  	return &cf, nil
   228  }
   229  
   230  // IsEmpty reports whether f is an empty filter, meaning it specifies no
   231  // non-empty query terms.
   232  func (ff *FindFilter) IsEmpty() bool {
   233  	return ff == nil ||
   234  		(len(ff.Revisions) == 0 && len(ff.Languages) == 0 && len(ff.BuildCorpus) == 0 &&
   235  			len(ff.Targets) == 0 && len(ff.Sources) == 0 && len(ff.Outputs) == 0) && len(ff.UnitCorpus) == 0
   236  }
   237  
   238  // The Unit interface expresses the capabilities required to represent a
   239  // compilation unit in a data store.
   240  type Unit interface {
   241  	encoding.BinaryMarshaler
   242  	json.Marshaler
   243  
   244  	// Index returns a the indexable terms of this unit.
   245  	Index() Index
   246  
   247  	// Canonicalize organizes the unit into a canonical form.  The meaning of
   248  	// canonicalization is unit-dependent, and may safely be a no-op.
   249  	Canonicalize()
   250  
   251  	// Digest produces a unique string representation of a unit sufficient to
   252  	// serve as a content-addressable digest.
   253  	Digest() string
   254  
   255  	// LookupVName looks up and returns the VName for the given file path
   256  	// or nil if it could not be found or the operation is unsupported.
   257  	LookupVName(path string) *spb.VName
   258  }
   259  
   260  // Index represents the indexable terms of a compilation.
   261  type Index struct {
   262  	Corpus   string   // The Kythe corpus name, e.g., "kythe"
   263  	Language string   // The Kythe language name, e.g., "c++".
   264  	Output   string   // The output name, e.g., "bazel-out/foo.o".
   265  	Inputs   []string // The digests of all required inputs.
   266  	Sources  []string // The paths of all source files.
   267  	Target   string   // The target name, e.g., "//file/base/go:file".
   268  }
   269  
   270  // HexDigest computes a hex-encoded SHA256 digest of data.
   271  func HexDigest(data []byte) string {
   272  	sum := sha256.Sum256(data)
   273  	return hex.EncodeToString(sum[:])
   274  }
   275  
   276  // IsValidDigest reports whether s is valid as a digest computed by the
   277  // HexDigest function. It does not check whether s could have actually been
   278  // generated by the hash function, only the structure of the value.
   279  func IsValidDigest(s string) bool {
   280  	if len(s) != 2*sha256.Size {
   281  		return false
   282  	}
   283  	for i := 0; i < len(s); i++ {
   284  		if !isLowerHex(s[i]) {
   285  			return false
   286  		}
   287  	}
   288  	return true
   289  }
   290  
   291  func isLowerHex(b byte) bool { return ('0' <= b && b <= '9') || ('a' <= b && b <= 'f') }
   292  
   293  // A CompiledFilter is a collection of matchers compiled from a FindFilter.
   294  type CompiledFilter struct {
   295  	RevisionMatches    func(...string) bool
   296  	BuildCorpusMatches func(...string) bool
   297  	UnitCorpusMatches  func(...string) bool
   298  	LanguageMatches    func(...string) bool
   299  	TargetMatches      func(...string) bool
   300  	OutputMatches      func(...string) bool
   301  	SourcesMatch       func(...string) bool
   302  }
   303  
   304  // matcher returns a function that reports whether any of its string arguments
   305  // is matched by at least one of the given regular expressions.  Matches are
   306  // implicitly anchored at both ends.
   307  //
   308  // If quote != nil, it is applied to each expression before compiling it.
   309  // If there are no expressions, the matcher returns true.
   310  func matcher(exprs []string, quote func(string) string) (func(...string) bool, error) {
   311  	if len(exprs) == 0 {
   312  		return func(...string) bool { return true }, nil
   313  	} else if quote == nil {
   314  		quote = func(s string) string { return s }
   315  	}
   316  	prep := make([]string, len(exprs))
   317  	for i, expr := range exprs {
   318  		prep[i] = `(?:` + quote(expr) + `)`
   319  	}
   320  	re, err := regexp.Compile(`^(?:` + strings.Join(prep, "|") + `)$`)
   321  	if err != nil {
   322  		return nil, err
   323  	}
   324  	return func(values ...string) bool {
   325  		for _, value := range values {
   326  			if re.MatchString(value) {
   327  				return true
   328  			}
   329  		}
   330  		return false
   331  	}, nil
   332  }
   333  
   334  // singleMatcher is a shortcut for a match with a single non-empty expression.
   335  func singleMatcher(single string) (func(...string) bool, error) {
   336  	if single == "" {
   337  		return func(...string) bool { return true }, nil
   338  	}
   339  	return matcher([]string{single}, nil)
   340  }
   341  
   342  // stringMatcher is a shortcut for matcher(exprs, regexp.QuoteMeta).
   343  func stringMatcher(exprs ...string) (func(...string) bool, error) {
   344  	return matcher(exprs, regexp.QuoteMeta)
   345  }
   346  
   347  // combineRegexps constructs a matcher that accepts the disjunction of its
   348  // input expressions.
   349  func combineRegexps(res []*regexp.Regexp) (func(...string) bool, error) {
   350  	exprs := make([]string, len(res))
   351  	for i, re := range res {
   352  		exprs[i] = re.String()
   353  	}
   354  	return matcher(exprs, nil)
   355  }
   356  
   357  // containsWhitespace reports whether s contains whitespace characters.
   358  func containsWhitespace(s string) bool { return strings.ContainsAny(s, "\t\n\v\f\r ") }