kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/platform/kcd/kcd.go (about) 1 /* 2 * Copyright 2016 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Package kcd defines an interface and utility functions for the 18 // implementation of a Kythe compilation database. 19 // 20 // Design documentation: kythe/docs/kythe-compilation-database.txt 21 package kcd // import "kythe.io/kythe/go/platform/kcd" 22 23 import ( 24 "context" 25 "crypto/sha256" 26 "encoding" 27 "encoding/hex" 28 "encoding/json" 29 "fmt" 30 "io" 31 "regexp" 32 "strings" 33 "time" 34 35 spb "kythe.io/kythe/proto/storage_go_proto" 36 ) 37 38 // Reader represents read-only access to an underlying storage layer used to 39 // implement a compilation database. 40 type Reader interface { 41 // Revisions calls f with each known revision matching the given filter. 42 // If filter == nil or is empty, all known revisions are reported. 43 // If f returns an error, that error is returned from Revisions. 44 // Timestamps are returned in UTC. 45 Revisions(_ context.Context, filter *RevisionsFilter, f func(Revision) error) error 46 47 // Find calls f with the digest of each known compilation matching filter. 48 // If filter == nil or is empty, no compilations are reported. 49 // If f returns an error, that error is returned from Find. 50 Find(_ context.Context, filter *FindFilter, f func(string) error) error 51 52 // Units calls f with the digest, format key, and content of each specified 53 // compilation that exists in the store. 54 // If f returns an error, that error is returned from Units. 55 Units(_ context.Context, unitDigests []string, f func(digest, key string, data []byte) error) error 56 57 // Files calls f with the digest and content of each specified file that 58 // exists in the store. 59 // If f returns an error, that error is returned from Files. 60 Files(_ context.Context, fileDigests []string, f func(string, []byte) error) error 61 62 // FilesExist calls f with the digest of each specified file that exists in 63 // the store. 64 // If f returns an error, that error is returned from FilesExist. 65 FilesExist(_ context.Context, fileDigests []string, f func(string) error) error 66 } 67 68 // Writer represents write access to an underlying storage layer used to 69 // implement a compilation database. 70 type Writer interface { 71 // WriteRevision records the specified revision into the store at the given 72 // timestamp. It is an error if rev.Revision == "" or rev.Corpus == "". 73 // If replace is true, any previous version of this marker is discarded 74 // before writing. 75 WriteRevision(_ context.Context, rev Revision, replace bool) error 76 77 // WriteUnit records unit in the store at the given revision, and returns 78 // the digest of the stored unit. It is an error if rev is invalid per 79 // WriteRevision. 80 WriteUnit(_ context.Context, rev Revision, formatKey string, unit Unit) (string, error) 81 82 // WriteFile fully reads r and records its content as a file in the store. 83 // Returns the digest of the stored file. 84 WriteFile(_ context.Context, r io.Reader) (string, error) 85 } 86 87 // ReadWriter expresses the capacity to both read and write a compilation database. 88 type ReadWriter interface { 89 Reader 90 Writer 91 } 92 93 // Deleter expresses the capacity to delete data from a compilation database. 94 // Each of the methods of this interface should return an error that satisfies 95 // os.IsNotExist if its argument does not match any known entries. 96 // Not all writable databases must support this interface. 97 type Deleter interface { 98 // DeleteUnit removes the specified unit from the database. 99 DeleteUnit(_ context.Context, unitDigest string) error 100 101 // DeleteFile removes the specified file from the database. 102 DeleteFile(_ context.Context, fileDigest string) error 103 104 // DeleteRevision removes the specified revision marker from the database. 105 // It is an error if rev == "" or corpus == "". All timestamps for the 106 // matching revision are discarded. 107 DeleteRevision(_ context.Context, revision, corpus string) error 108 } 109 110 // ReadWriteDeleter expresses the capacity to read, write, and delete data in a 111 // compilation database. 112 type ReadWriteDeleter interface { 113 Reader 114 Writer 115 Deleter 116 } 117 118 // RevisionsFilter gives constraints on which revisions are matched by a call to 119 // the Revisions method of compdb.Reader. 120 type RevisionsFilter struct { 121 Revision string // If set return revision markers matching this RE2. 122 Corpus string // If set, return only revisions for this corpus. 123 Until time.Time // If nonzero, return only revisions at or before this time. 124 Since time.Time // If nonzero, return only revisions at or after this time. 125 } 126 127 // Compile compiles the filter into a matching function that reports whether 128 // its argument matches the original filter. 129 func (rf *RevisionsFilter) Compile() (func(Revision) bool, error) { 130 // A nil or empty filter matches all revisions. 131 if rf == nil || (rf.Revision == "" && rf.Corpus == "" && rf.Until.IsZero() && rf.Since.IsZero()) { 132 return func(Revision) bool { return true }, nil 133 } 134 var matchRevision, matchCorpus func(...string) bool 135 var err error 136 if matchRevision, err = singleMatcher(rf.Revision); err != nil { 137 return nil, err 138 } 139 if matchCorpus, err = singleMatcher(regexp.QuoteMeta(rf.Corpus)); err != nil { 140 return nil, err 141 } 142 return func(rev Revision) bool { 143 return matchRevision(rev.Revision) && 144 matchCorpus(rev.Corpus) && 145 (rf.Until.IsZero() || !rf.Until.Before(rev.Timestamp.In(time.UTC))) && 146 (rf.Since.IsZero() || !rev.Timestamp.In(time.UTC).Before(rf.Since)) 147 }, nil 148 } 149 150 // A Revision represents a single revision stored in the database. 151 type Revision struct { 152 Revision, Corpus string 153 Timestamp time.Time 154 } 155 156 func (r Revision) String() string { 157 return fmt.Sprintf("#<rev %q corpus=%q at %v>", r.Revision, r.Corpus, r.Timestamp) 158 } 159 160 // IsValid returns an error if the revision or corpus fields are invalid. 161 func (r Revision) IsValid() error { 162 if !IsRevisionValid(r.Revision) { 163 return fmt.Errorf("invalid revision %q", r.Revision) 164 } else if !IsCorpusValid(r.Corpus) { 165 return fmt.Errorf("invalid corpus %q", r.Corpus) 166 } 167 return nil 168 } 169 170 // IsRevisionValid reports whether r is a valid revision marker. 171 // A marker is valid if it is nonempty and does not contain whitespace. 172 func IsRevisionValid(r string) bool { return r != "" && !containsWhitespace(r) } 173 174 // IsCorpusValid reports whether c is a valid corpus marker. 175 // A corpus is valid if it is nonempty and does not contain whitespace. 176 func IsCorpusValid(c string) bool { return c != "" && !containsWhitespace(c) } 177 178 // FindFilter gives constraints on which compilations are matched by a call to 179 // the Find method of compdb.Reader. 180 type FindFilter struct { 181 UnitCorpus []string // Include only these unit corpus labels (exact match) 182 Revisions []string // Include only these revisions (exact match). 183 Languages []string // Include only these languages (Kythe language names). 184 BuildCorpus []string // Include only these build corpus labels (exact match). 185 186 Targets []*regexp.Regexp // Include only compilations for these targets. 187 Sources []*regexp.Regexp // Include only compilations for these sources. 188 Outputs []*regexp.Regexp // include only compilations for these outputs. 189 } 190 191 // Compile returns a compiled filter that matches index terms based on ff. 192 // Returns nil if ff is an empty filter. 193 func (ff *FindFilter) Compile() (*CompiledFilter, error) { 194 if ff.IsEmpty() { 195 return nil, nil 196 } 197 var cf CompiledFilter 198 var err error 199 cf.RevisionMatches, err = stringMatcher(ff.Revisions...) 200 if err != nil { 201 return nil, err 202 } 203 cf.BuildCorpusMatches, err = stringMatcher(ff.BuildCorpus...) 204 if err != nil { 205 return nil, err 206 } 207 cf.UnitCorpusMatches, err = stringMatcher(ff.UnitCorpus...) 208 if err != nil { 209 return nil, err 210 } 211 cf.LanguageMatches, err = stringMatcher(ff.Languages...) 212 if err != nil { 213 return nil, err 214 } 215 cf.TargetMatches, err = combineRegexps(ff.Targets) 216 if err != nil { 217 return nil, err 218 } 219 cf.OutputMatches, err = combineRegexps(ff.Outputs) 220 if err != nil { 221 return nil, err 222 } 223 cf.SourcesMatch, err = combineRegexps(ff.Sources) 224 if err != nil { 225 return nil, err 226 } 227 return &cf, nil 228 } 229 230 // IsEmpty reports whether f is an empty filter, meaning it specifies no 231 // non-empty query terms. 232 func (ff *FindFilter) IsEmpty() bool { 233 return ff == nil || 234 (len(ff.Revisions) == 0 && len(ff.Languages) == 0 && len(ff.BuildCorpus) == 0 && 235 len(ff.Targets) == 0 && len(ff.Sources) == 0 && len(ff.Outputs) == 0) && len(ff.UnitCorpus) == 0 236 } 237 238 // The Unit interface expresses the capabilities required to represent a 239 // compilation unit in a data store. 240 type Unit interface { 241 encoding.BinaryMarshaler 242 json.Marshaler 243 244 // Index returns a the indexable terms of this unit. 245 Index() Index 246 247 // Canonicalize organizes the unit into a canonical form. The meaning of 248 // canonicalization is unit-dependent, and may safely be a no-op. 249 Canonicalize() 250 251 // Digest produces a unique string representation of a unit sufficient to 252 // serve as a content-addressable digest. 253 Digest() string 254 255 // LookupVName looks up and returns the VName for the given file path 256 // or nil if it could not be found or the operation is unsupported. 257 LookupVName(path string) *spb.VName 258 } 259 260 // Index represents the indexable terms of a compilation. 261 type Index struct { 262 Corpus string // The Kythe corpus name, e.g., "kythe" 263 Language string // The Kythe language name, e.g., "c++". 264 Output string // The output name, e.g., "bazel-out/foo.o". 265 Inputs []string // The digests of all required inputs. 266 Sources []string // The paths of all source files. 267 Target string // The target name, e.g., "//file/base/go:file". 268 } 269 270 // HexDigest computes a hex-encoded SHA256 digest of data. 271 func HexDigest(data []byte) string { 272 sum := sha256.Sum256(data) 273 return hex.EncodeToString(sum[:]) 274 } 275 276 // IsValidDigest reports whether s is valid as a digest computed by the 277 // HexDigest function. It does not check whether s could have actually been 278 // generated by the hash function, only the structure of the value. 279 func IsValidDigest(s string) bool { 280 if len(s) != 2*sha256.Size { 281 return false 282 } 283 for i := 0; i < len(s); i++ { 284 if !isLowerHex(s[i]) { 285 return false 286 } 287 } 288 return true 289 } 290 291 func isLowerHex(b byte) bool { return ('0' <= b && b <= '9') || ('a' <= b && b <= 'f') } 292 293 // A CompiledFilter is a collection of matchers compiled from a FindFilter. 294 type CompiledFilter struct { 295 RevisionMatches func(...string) bool 296 BuildCorpusMatches func(...string) bool 297 UnitCorpusMatches func(...string) bool 298 LanguageMatches func(...string) bool 299 TargetMatches func(...string) bool 300 OutputMatches func(...string) bool 301 SourcesMatch func(...string) bool 302 } 303 304 // matcher returns a function that reports whether any of its string arguments 305 // is matched by at least one of the given regular expressions. Matches are 306 // implicitly anchored at both ends. 307 // 308 // If quote != nil, it is applied to each expression before compiling it. 309 // If there are no expressions, the matcher returns true. 310 func matcher(exprs []string, quote func(string) string) (func(...string) bool, error) { 311 if len(exprs) == 0 { 312 return func(...string) bool { return true }, nil 313 } else if quote == nil { 314 quote = func(s string) string { return s } 315 } 316 prep := make([]string, len(exprs)) 317 for i, expr := range exprs { 318 prep[i] = `(?:` + quote(expr) + `)` 319 } 320 re, err := regexp.Compile(`^(?:` + strings.Join(prep, "|") + `)$`) 321 if err != nil { 322 return nil, err 323 } 324 return func(values ...string) bool { 325 for _, value := range values { 326 if re.MatchString(value) { 327 return true 328 } 329 } 330 return false 331 }, nil 332 } 333 334 // singleMatcher is a shortcut for a match with a single non-empty expression. 335 func singleMatcher(single string) (func(...string) bool, error) { 336 if single == "" { 337 return func(...string) bool { return true }, nil 338 } 339 return matcher([]string{single}, nil) 340 } 341 342 // stringMatcher is a shortcut for matcher(exprs, regexp.QuoteMeta). 343 func stringMatcher(exprs ...string) (func(...string) bool, error) { 344 return matcher(exprs, regexp.QuoteMeta) 345 } 346 347 // combineRegexps constructs a matcher that accepts the disjunction of its 348 // input expressions. 349 func combineRegexps(res []*regexp.Regexp) (func(...string) bool, error) { 350 exprs := make([]string, len(res)) 351 for i, re := range res { 352 exprs[i] = re.String() 353 } 354 return matcher(exprs, nil) 355 } 356 357 // containsWhitespace reports whether s contains whitespace characters. 358 func containsWhitespace(s string) bool { return strings.ContainsAny(s, "\t\n\v\f\r ") }