go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/api/gitiles/refset.go (about)

     1  // Copyright 2018 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gitiles
    16  
    17  import (
    18  	"context"
    19  	"regexp"
    20  	"strings"
    21  	"sync"
    22  
    23  	"go.chromium.org/luci/common/data/stringset"
    24  	"go.chromium.org/luci/common/proto/gitiles"
    25  	"go.chromium.org/luci/common/sync/parallel"
    26  	"go.chromium.org/luci/config/validation"
    27  )
    28  
    29  // RefSet efficiently resolves many refs, supporting regexps.
    30  //
    31  // RefSet groups refs by prefix and issues 1 refs RPC per prefix. This is more
    32  // efficient that a single refs RPC for "refs/" prefix, because it would return
    33  // all refs of the repo, incl. potentially huge number of refs in refs/changes/.
    34  type RefSet struct {
    35  	byPrefix map[string]*refSetPrefix
    36  
    37  	// These two fields are used by Resolve() method to compute missing refs.
    38  	literalRefs []string
    39  	regexpRefs  []struct {
    40  		ref string
    41  		re  *regexp.Regexp
    42  	}
    43  }
    44  
    45  // NewRefSet creates an instance of the RefSet.
    46  //
    47  // Each entry in the refs parameter can be either
    48  //   - a fully-qualified ref with at least 2 slashes, e.g. `refs/heads/master`,
    49  //     `refs/tags/v1.2.3`, or
    50  //   - a regular expression with "regexp:" prefix to match multiple refs, e.g.
    51  //     `regexp:refs/heads/.*` or `regexp:refs/branch-heads/\d+\.\d+`.
    52  //
    53  // The regular expression must have:
    54  //   - a literal prefix with at least 2 slashes, e.g. `refs/release-\d+/foo` is
    55  //     not allowed, because the literal prefix `refs/release-` contains only one
    56  //     slash, and
    57  //   - must not start with ^ or end with $ as they will be added automatically.
    58  //
    59  // See also ValidateRefSet function.
    60  func NewRefSet(refs []string) RefSet {
    61  	w := RefSet{
    62  		byPrefix: map[string]*refSetPrefix{},
    63  	}
    64  	nsRegexps := map[string][]string{}
    65  	for _, ref := range refs {
    66  		prefix, literalRef, refRegexp, compiledRegexp := parseRef(ref)
    67  		if _, exists := w.byPrefix[prefix]; !exists {
    68  			w.byPrefix[prefix] = &refSetPrefix{prefix: prefix}
    69  		}
    70  
    71  		switch {
    72  		case (literalRef == "") == (refRegexp == ""):
    73  			panic("exactly one must be defined")
    74  		case refRegexp != "":
    75  			nsRegexps[prefix] = append(nsRegexps[prefix], refRegexp)
    76  			w.regexpRefs = append(w.regexpRefs, struct {
    77  				ref string
    78  				re  *regexp.Regexp
    79  			}{ref: ref, re: compiledRegexp})
    80  		case literalRef != "":
    81  			w.byPrefix[prefix].addLiteralRef(literalRef)
    82  			w.literalRefs = append(w.literalRefs, literalRef)
    83  		}
    84  	}
    85  
    86  	for prefix, regexps := range nsRegexps {
    87  		w.byPrefix[prefix].refRegexp = regexp.MustCompile(
    88  			"^(" + strings.Join(regexps, ")|(") + ")$")
    89  	}
    90  
    91  	return w
    92  }
    93  
    94  // Has checks if a specific ref is in this set.
    95  func (w RefSet) Has(ref string) bool {
    96  	for prefix, wrp := range w.byPrefix {
    97  		nsPrefix := prefix + "/"
    98  		if strings.HasPrefix(ref, nsPrefix) && wrp.hasRef(ref) {
    99  			return true
   100  		}
   101  	}
   102  
   103  	return false
   104  }
   105  
   106  // Resolve queries gitiles to resolve watched refs to git SHA1 hash of their
   107  // current tips.
   108  //
   109  // Returns map from individual ref to its SHA1 hash and a list of original refs,
   110  // incl. regular expressions, which either don't exist or are not visible to the
   111  // requester.
   112  func (w RefSet) Resolve(ctx context.Context, client gitiles.GitilesClient, project string) (refTips map[string]string, missingRefs []string, err error) {
   113  	lock := sync.Mutex{} // for concurrent writes to the map
   114  	refTips = map[string]string{}
   115  	err = parallel.FanOutIn(func(work chan<- func() error) {
   116  		for prefix := range w.byPrefix {
   117  			prefix := prefix
   118  			work <- func() error {
   119  				resp, err := client.Refs(ctx, &gitiles.RefsRequest{Project: project, RefsPath: prefix})
   120  				if err != nil {
   121  					return err
   122  				}
   123  				lock.Lock()
   124  				defer lock.Unlock()
   125  				for ref, tip := range resp.Revisions {
   126  					if w.Has(ref) {
   127  						refTips[ref] = tip
   128  					}
   129  				}
   130  				return nil
   131  			}
   132  		}
   133  	})
   134  	if err != nil {
   135  		return
   136  	}
   137  	// Compute missingRefs as those for which no actual ref was found.
   138  	for _, ref := range w.literalRefs {
   139  		if _, ok := refTips[ref]; !ok {
   140  			missingRefs = append(missingRefs, ref)
   141  		}
   142  	}
   143  	for _, r := range w.regexpRefs {
   144  		found := false
   145  		// This loop isn't the most efficient way to perform this search, and may
   146  		// result in executing MatchString O(refTips) times. If necessary to
   147  		// optimize, store individual regexps inside relevant refSetPrefix,
   148  		// and then mark corresponding regexps as "found" on the fly inside a
   149  		// goroutine working with the refSetPrefix.
   150  		for resolvedRef := range refTips {
   151  			if r.re.MatchString(resolvedRef) {
   152  				found = true
   153  				break
   154  			}
   155  		}
   156  		if !found {
   157  			missingRefs = append(missingRefs, r.ref)
   158  		}
   159  	}
   160  	return
   161  }
   162  
   163  // ValidateRefSet validates strings representing a set of refs.
   164  //
   165  // It ensures that passed strings match the requirements as described in the
   166  // documentation for the NewRefSet function. It is designed to work with config
   167  // validation logic, hence one needs to pass in the validation.Context as well.
   168  func ValidateRefSet(c *validation.Context, refs []string) {
   169  	for _, ref := range refs {
   170  		if strings.HasPrefix(ref, "regexp:") {
   171  			validateRegexpRef(c, ref)
   172  			continue
   173  		}
   174  
   175  		if !strings.HasPrefix(ref, "refs/") {
   176  			c.Errorf("ref must start with 'refs/' not %q", ref)
   177  		}
   178  
   179  		if strings.Count(ref, "/") < 2 {
   180  			c.Errorf(`fewer than 2 slashes in ref %q`, ref)
   181  		}
   182  	}
   183  }
   184  
   185  type refSetPrefix struct {
   186  	prefix string // no trailing "/".
   187  	// literalRefs is a set of immediate children, not grandchildren. i.e., may
   188  	// contain 'refs/prefix/child', but not 'refs/prefix/grand/child', which would
   189  	// be contained in refSetPrefix for 'refs/prefix/grand'.
   190  	literalRefs stringset.Set
   191  	// refRegexp is a regular expression matching all descendants.
   192  	refRegexp *regexp.Regexp
   193  }
   194  
   195  func (w refSetPrefix) hasRef(ref string) bool {
   196  	switch {
   197  	case w.refRegexp != nil && w.refRegexp.MatchString(ref):
   198  		return true
   199  	case w.literalRefs == nil:
   200  		return false
   201  	default:
   202  		return w.literalRefs.Has(ref)
   203  	}
   204  }
   205  
   206  func (w *refSetPrefix) addLiteralRef(literalRef string) {
   207  	if w.literalRefs == nil {
   208  		w.literalRefs = stringset.New(1)
   209  	}
   210  	w.literalRefs.Add(literalRef)
   211  }
   212  
   213  func validateRegexpRef(c *validation.Context, ref string) {
   214  	c.Enter(ref)
   215  	defer c.Exit()
   216  	reStr := strings.TrimPrefix(ref, "regexp:")
   217  	if strings.HasPrefix(reStr, "^") || strings.HasSuffix(reStr, "$") {
   218  		c.Errorf("^ and $ qualifiers are added automatically, please remove them")
   219  		return
   220  	}
   221  	r, err := regexp.Compile(reStr)
   222  	if err != nil {
   223  		c.Errorf("invalid regexp: %s", err)
   224  		return
   225  	}
   226  	lp, _ := r.LiteralPrefix()
   227  	if strings.Count(lp, "/") < 2 {
   228  		c.Errorf(`fewer than 2 slashes in literal prefix %q, e.g., `+
   229  			`"refs/heads/\d+" is accepted because of "refs/heads/" is the `+
   230  			`literal prefix, while "refs/.*" is too short`, lp)
   231  	}
   232  	if !strings.HasPrefix(lp, "refs/") {
   233  		c.Errorf(`literal prefix %q must start with "refs/"`, lp)
   234  	}
   235  }
   236  
   237  func parseRef(ref string) (prefix, literalRef, refRegexp string, compiledRegexp *regexp.Regexp) {
   238  	if strings.HasPrefix(ref, "regexp:") {
   239  		refRegexp = strings.TrimPrefix(ref, "regexp:")
   240  		compiledRegexp = regexp.MustCompile("^" + refRegexp + "$")
   241  		// Sometimes, LiteralPrefix(^regexp$) != LiteralPrefix(regexp)
   242  		// See https://github.com/golang/go/issues/30425
   243  		literalPrefix, complete := regexp.MustCompile(refRegexp).LiteralPrefix()
   244  		prefix = literalPrefix[:strings.LastIndex(literalPrefix, "/")]
   245  		if complete {
   246  			// Trivial regexp which matches only and exactly literalPrefix.
   247  			literalRef = literalPrefix
   248  			compiledRegexp = nil
   249  			refRegexp = ""
   250  		}
   251  		return
   252  	}
   253  
   254  	// Plain ref name, just extract the ref prefix from it.
   255  	lastSlash := strings.LastIndex(ref, "/")
   256  	prefix, literalRef = ref[:lastSlash], ref
   257  	return
   258  }