sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/repoowners/repoowners.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package repoowners
    18  
    19  import (
    20  	"fmt"
    21  	"os"
    22  	"path/filepath"
    23  	"regexp"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/sirupsen/logrus"
    29  	"sigs.k8s.io/yaml"
    30  
    31  	"k8s.io/apimachinery/pkg/util/sets"
    32  
    33  	"sigs.k8s.io/prow/pkg/git/v2"
    34  	"sigs.k8s.io/prow/pkg/github"
    35  	"sigs.k8s.io/prow/pkg/layeredsets"
    36  	"sigs.k8s.io/prow/pkg/plugins/ownersconfig"
    37  
    38  	prowConf "sigs.k8s.io/prow/pkg/config"
    39  )
    40  
    41  const (
    42  	// GitHub's api uses "" (empty) string as basedir by convention but it's clearer to use "/"
    43  	baseDirConvention = ""
    44  )
    45  
    46  type dirOptions struct {
    47  	NoParentOwners bool `json:"no_parent_owners,omitempty"`
    48  	// AutoApproveUnownedSubfolders will result in changes to a subpath of a given path
    49  	// that does not have an OWNERS file being auto-approved. This should be
    50  	// enabled with caution.
    51  	AutoApproveUnownedSubfolders bool `json:"auto_approve_unowned_subfolders,omitempty"`
    52  }
    53  
    54  // Config holds roles+usernames and labels for a directory considered as a unit of independent code
    55  type Config struct {
    56  	Approvers         []string `json:"approvers,omitempty"`
    57  	Reviewers         []string `json:"reviewers,omitempty"`
    58  	RequiredReviewers []string `json:"required_reviewers,omitempty"`
    59  	Labels            []string `json:"labels,omitempty"`
    60  }
    61  
    62  // SimpleConfig holds options and Config applied to everything under the containing directory
    63  type SimpleConfig struct {
    64  	Options dirOptions `json:"options,omitempty"`
    65  	Config  `json:",inline"`
    66  }
    67  
    68  // Empty checks if a SimpleConfig could be considered empty
    69  func (s *SimpleConfig) Empty() bool {
    70  	return len(s.Approvers) == 0 && len(s.Reviewers) == 0 && len(s.RequiredReviewers) == 0 && len(s.Labels) == 0
    71  }
    72  
    73  // FullConfig contains Filters which apply specific Config to files matching its regexp
    74  type FullConfig struct {
    75  	Options dirOptions        `json:"options,omitempty"`
    76  	Filters map[string]Config `json:"filters,omitempty"`
    77  }
    78  
    79  type githubClient interface {
    80  	ListCollaborators(org, repo string) ([]github.User, error)
    81  	GetRef(org, repo, ref string) (string, error)
    82  }
    83  
    84  func newCache() *cache {
    85  	return &cache{
    86  		lockMapLock: &sync.Mutex{},
    87  		lockMap:     map[string]*sync.Mutex{},
    88  		dataLock:    &sync.Mutex{},
    89  		data:        map[string]cacheEntry{},
    90  	}
    91  }
    92  
    93  type cache struct {
    94  	// These are used to lock access to individual keys to avoid wasted tokens
    95  	// on concurrent requests. This has no effect when using ghproxy, as ghproxy
    96  	// serializes identical requests anyways. This should be removed once ghproxy
    97  	// is made mandatory.
    98  	lockMapLock *sync.Mutex
    99  	lockMap     map[string]*sync.Mutex
   100  
   101  	dataLock *sync.Mutex
   102  	data     map[string]cacheEntry
   103  }
   104  
   105  // getEntry returns the data for the key, a boolean indicating if data existed and a lock.
   106  // The lock is already locked, it must be unlocked by the caller.
   107  func (c *cache) getEntry(key string) (cacheEntry, bool, *sync.Mutex) {
   108  	c.lockMapLock.Lock()
   109  	entryLock, ok := c.lockMap[key]
   110  	if !ok {
   111  		c.lockMap[key] = &sync.Mutex{}
   112  		entryLock = c.lockMap[key]
   113  	}
   114  	c.lockMapLock.Unlock()
   115  
   116  	entryLock.Lock()
   117  	c.dataLock.Lock()
   118  	defer c.dataLock.Unlock()
   119  	entry, ok := c.data[key]
   120  	return entry, ok, entryLock
   121  }
   122  
   123  func (c *cache) setEntry(key string, data cacheEntry) {
   124  	c.dataLock.Lock()
   125  	c.data[key] = data
   126  	c.dataLock.Unlock()
   127  }
   128  
   129  type cacheEntry struct {
   130  	sha     string
   131  	aliases RepoAliases
   132  	owners  *RepoOwners
   133  }
   134  
   135  func (entry cacheEntry) matchesMDYAML(mdYAML bool) bool {
   136  	return entry.owners.enableMDYAML == mdYAML
   137  }
   138  
   139  func (entry cacheEntry) fullyLoaded() bool {
   140  	return entry.sha != "" && entry.aliases != nil && entry.owners != nil
   141  }
   142  
   143  // Interface is an interface to work with OWNERS files.
   144  type Interface interface {
   145  	LoadRepoOwners(org, repo, base string) (RepoOwner, error)
   146  	LoadRepoOwnersSha(org, repo, base, sha string, updateCache bool) (RepoOwner, error)
   147  
   148  	WithFields(fields logrus.Fields) Interface
   149  	WithGitHubClient(client github.Client) Interface
   150  	ForPlugin(plugin string) Interface
   151  	Used() bool
   152  }
   153  
   154  // Client is an implementation of the Interface.
   155  var _ Interface = &Client{}
   156  
   157  // Client is the repoowners client
   158  type Client struct {
   159  	logger *logrus.Entry
   160  	ghc    githubClient
   161  	used   bool
   162  	*delegate
   163  }
   164  
   165  type delegate struct {
   166  	git git.ClientFactory
   167  
   168  	mdYAMLEnabled     func(org, repo string) bool
   169  	skipCollaborators func(org, repo string) bool
   170  	ownersDirDenylist func() *prowConf.OwnersDirDenylist
   171  	filenames         ownersconfig.Resolver
   172  
   173  	cache *cache
   174  }
   175  
   176  // WithFields clones the client, keeping the underlying delegate the same but adding
   177  // fields to the logging context
   178  func (c *Client) WithFields(fields logrus.Fields) Interface {
   179  	return &Client{
   180  		logger:   c.logger.WithFields(fields),
   181  		delegate: c.delegate,
   182  	}
   183  }
   184  
   185  // WithGitHubClient clones the client, keeping the underlying delegate the same but adding
   186  // a new GitHub Client. This is useful when making use a context-local client
   187  func (c *Client) WithGitHubClient(client github.Client) Interface {
   188  	return &Client{
   189  		logger:   c.logger,
   190  		ghc:      client,
   191  		delegate: c.delegate,
   192  	}
   193  }
   194  
   195  // ForPlugin clones the client, keeping the underlying delegate the same but adding
   196  // a log field
   197  func (c *Client) ForPlugin(plugin string) Interface {
   198  	return c.forKeyValue("plugin", plugin)
   199  }
   200  
   201  func (c *Client) forKeyValue(key, value string) Interface {
   202  	return &Client{
   203  		logger:   c.logger.WithField(key, value),
   204  		ghc:      c.ghc,
   205  		delegate: c.delegate,
   206  	}
   207  }
   208  
   209  // Used determines whether the client has been used
   210  func (c *Client) Used() bool {
   211  	return c.used
   212  }
   213  
   214  // NewClient is the constructor for Client
   215  func NewClient(
   216  	gc git.ClientFactory,
   217  	ghc github.Client,
   218  	mdYAMLEnabled func(org, repo string) bool,
   219  	skipCollaborators func(org, repo string) bool,
   220  	ownersDirDenylist func() *prowConf.OwnersDirDenylist,
   221  	filenames ownersconfig.Resolver,
   222  ) *Client {
   223  	return &Client{
   224  		logger: logrus.WithField("client", "repoowners"),
   225  		ghc:    ghc,
   226  		delegate: &delegate{
   227  			git:   gc,
   228  			cache: newCache(),
   229  
   230  			mdYAMLEnabled:     mdYAMLEnabled,
   231  			skipCollaborators: skipCollaborators,
   232  			ownersDirDenylist: ownersDirDenylist,
   233  			filenames:         filenames,
   234  		},
   235  	}
   236  }
   237  
   238  // RepoAliases defines groups of people to be used in OWNERS files
   239  type RepoAliases map[string]sets.Set[string]
   240  
   241  // RepoOwner is an interface to work with repoowners
   242  type RepoOwner interface {
   243  	FindApproverOwnersForFile(path string) string
   244  	FindReviewersOwnersForFile(path string) string
   245  	FindLabelsForFile(path string) sets.Set[string]
   246  	IsNoParentOwners(path string) bool
   247  	IsAutoApproveUnownedSubfolders(directory string) bool
   248  	LeafApprovers(path string) sets.Set[string]
   249  	Approvers(path string) layeredsets.String
   250  	LeafReviewers(path string) sets.Set[string]
   251  	Reviewers(path string) layeredsets.String
   252  	RequiredReviewers(path string) sets.Set[string]
   253  	ParseSimpleConfig(path string) (SimpleConfig, error)
   254  	ParseFullConfig(path string) (FullConfig, error)
   255  	TopLevelApprovers() sets.Set[string]
   256  	Filenames() ownersconfig.Filenames
   257  	AllOwners() sets.Set[string]
   258  	AllApprovers() sets.Set[string]
   259  	AllReviewers() sets.Set[string]
   260  }
   261  
   262  var _ RepoOwner = &RepoOwners{}
   263  
   264  // RepoOwners contains the parsed OWNERS config.
   265  type RepoOwners struct {
   266  	RepoAliases
   267  
   268  	approvers         map[string]map[*regexp.Regexp]sets.Set[string]
   269  	reviewers         map[string]map[*regexp.Regexp]sets.Set[string]
   270  	requiredReviewers map[string]map[*regexp.Regexp]sets.Set[string]
   271  	labels            map[string]map[*regexp.Regexp]sets.Set[string]
   272  	options           map[string]dirOptions
   273  
   274  	baseDir      string
   275  	enableMDYAML bool
   276  	dirDenylist  []*regexp.Regexp
   277  	filenames    ownersconfig.Filenames
   278  
   279  	log *logrus.Entry
   280  }
   281  
   282  func (r *RepoOwners) Filenames() ownersconfig.Filenames {
   283  	return r.filenames
   284  }
   285  
   286  // LoadRepoOwners returns an up-to-date RepoOwners struct for the specified repo.
   287  // Note: The returned *RepoOwners should be treated as read only.
   288  func (c *Client) LoadRepoOwners(org, repo, base string) (RepoOwner, error) {
   289  	log := c.logger.WithFields(logrus.Fields{"org": org, "repo": repo, "base": base})
   290  	cloneRef := fmt.Sprintf("%s/%s", org, repo)
   291  	fullName := fmt.Sprintf("%s:%s", cloneRef, base)
   292  
   293  	start := time.Now()
   294  	sha, err := c.ghc.GetRef(org, repo, fmt.Sprintf("heads/%s", base))
   295  	if err != nil {
   296  		return nil, fmt.Errorf("failed to get current SHA for %s: %w", fullName, err)
   297  	}
   298  	if sha == "" {
   299  		return nil, fmt.Errorf("got an empty SHA for %s@heads/%s", fullName, base)
   300  	}
   301  	log.WithField("duration", time.Since(start).String()).Debugf("Completed ghc.GetRef(%s, %s, %s)", org, repo, fmt.Sprintf("heads/%s", base))
   302  
   303  	return c.LoadRepoOwnersSha(org, repo, base, sha, true)
   304  }
   305  
   306  func (c *Client) LoadRepoOwnersSha(org, repo, base, sha string, updateCache bool) (RepoOwner, error) {
   307  	c.used = true
   308  	log := c.logger.WithFields(logrus.Fields{"org": org, "repo": repo, "base": base, "sha": sha})
   309  	cloneRef := fmt.Sprintf("%s/%s", org, repo)
   310  	fullName := fmt.Sprintf("%s:%s", cloneRef, base)
   311  
   312  	entry, err := c.cacheEntryFor(org, repo, base, cloneRef, fullName, sha, updateCache, log)
   313  	if err != nil {
   314  		return nil, err
   315  	}
   316  
   317  	start := time.Now()
   318  	if c.skipCollaborators(org, repo) {
   319  		log.WithField("duration", time.Since(start).String()).Debugf("Completed c.skipCollaborators(%s, %s)", org, repo)
   320  		log.Debugf("Skipping collaborator checks for %s/%s", org, repo)
   321  		return entry.owners, nil
   322  	}
   323  	log.WithField("duration", time.Since(start).String()).Debugf("Completed c.skipCollaborators(%s, %s)", org, repo)
   324  
   325  	var owners *RepoOwners
   326  	// Filter collaborators. We must filter the RepoOwners struct even if it came from the cache
   327  	// because the list of collaborators could have changed without the git SHA changing.
   328  	start = time.Now()
   329  	collaborators, err := c.ghc.ListCollaborators(org, repo)
   330  	log.WithField("duration", time.Since(start).String()).Debugf("Completed ghc.ListCollaborators(%s, %s)", org, repo)
   331  	if err != nil {
   332  		log.WithError(err).Errorf("Failed to list collaborators while loading RepoOwners. Skipping collaborator filtering.")
   333  		owners = entry.owners
   334  	} else {
   335  		start = time.Now()
   336  		owners = entry.owners.filterCollaborators(collaborators)
   337  		log.WithField("duration", time.Since(start).String()).Debugf("Completed owners.filterCollaborators(collaborators)")
   338  	}
   339  	return owners, nil
   340  }
   341  
   342  func (c *Client) cacheEntryFor(org, repo, base, cloneRef, fullName, sha string, setEntry bool, log *logrus.Entry) (cacheEntry, error) {
   343  	mdYaml := c.mdYAMLEnabled(org, repo)
   344  	lockStart := time.Now()
   345  	defer func() {
   346  		log.WithField("duration", time.Since(lockStart).String()).Debug("Locked section of loadRepoOwners completed")
   347  	}()
   348  	entry, ok, entryLock := c.cache.getEntry(fullName)
   349  	defer entryLock.Unlock()
   350  	filenames := c.filenames(org, repo)
   351  	if !ok || entry.sha != sha || entry.owners == nil || !entry.matchesMDYAML(mdYaml) {
   352  		start := time.Now()
   353  		gitRepo, err := c.git.ClientFor(org, repo)
   354  		if err != nil {
   355  			return cacheEntry{}, fmt.Errorf("failed to clone %s: %w", cloneRef, err)
   356  		}
   357  		log.WithField("duration", time.Since(start).String()).Debugf("Completed git.ClientFor(%s, %s)", org, repo)
   358  		defer gitRepo.Clean()
   359  
   360  		reusable := entry.fullyLoaded() && entry.matchesMDYAML(mdYaml)
   361  		// In most sha changed cases, the files associated with the owners are unchanged.
   362  		// The cached entry can continue to be used, so need do git diff
   363  		if reusable {
   364  			start = time.Now()
   365  			changes, err := gitRepo.Diff(sha, entry.sha)
   366  			if err != nil {
   367  				return cacheEntry{}, fmt.Errorf("failed to diff %s with %s", sha, entry.sha)
   368  			}
   369  			log.WithField("duration", time.Since(start).String()).Debugf("Completed git.Diff(%s, %s)", sha, entry.sha)
   370  			start = time.Now()
   371  			for _, change := range changes {
   372  				if mdYaml && strings.HasSuffix(change, ".md") ||
   373  					strings.HasSuffix(change, filenames.OwnersAliases) ||
   374  					strings.HasSuffix(change, filenames.Owners) {
   375  					reusable = false
   376  					log.WithField("duration", time.Since(start).String()).Debugf("Completed owners change verification loop")
   377  					break
   378  				}
   379  			}
   380  			log.WithField("duration", time.Since(start).String()).Debugf("Completed owners change verification loop")
   381  		}
   382  		if reusable {
   383  			entry.sha = sha
   384  		} else {
   385  			start = time.Now()
   386  			if err := gitRepo.Checkout(base); err != nil {
   387  				return cacheEntry{}, err
   388  			}
   389  			log.WithField("duration", time.Since(start).String()).Debugf("Completed gitRepo.Checkout(%s)", base)
   390  
   391  			start = time.Now()
   392  			if entry.aliases == nil || entry.sha != sha {
   393  				// aliases must be loaded
   394  				entry.aliases = loadAliasesFrom(gitRepo.Directory(), filenames.OwnersAliases, log)
   395  			}
   396  			log.WithField("duration", time.Since(start).String()).Debugf("Completed loadAliasesFrom(%s, log)", gitRepo.Directory())
   397  
   398  			start = time.Now()
   399  			ignoreDirPatterns := c.ownersDirDenylist().ListIgnoredDirs(org, repo)
   400  			var dirIgnorelist []*regexp.Regexp
   401  			for _, pattern := range ignoreDirPatterns {
   402  				re, err := regexp.Compile(pattern)
   403  				if err != nil {
   404  					log.WithError(err).Errorf("Invalid OWNERS dir denylist regexp %q.", pattern)
   405  					continue
   406  				}
   407  				dirIgnorelist = append(dirIgnorelist, re)
   408  			}
   409  			log.WithField("duration", time.Since(start).String()).Debugf("Completed dirIgnorelist loading")
   410  
   411  			start = time.Now()
   412  			entry.owners, err = loadOwnersFrom(gitRepo.Directory(), mdYaml, entry.aliases, dirIgnorelist, filenames, log)
   413  			if err != nil {
   414  				return cacheEntry{}, fmt.Errorf("failed to load RepoOwners for %s: %w", fullName, err)
   415  			}
   416  			log.WithField("duration", time.Since(start).String()).Debugf("Completed loadOwnersFrom(%s, %t, entry.aliases, dirIgnorelist, log)", gitRepo.Directory(), mdYaml)
   417  			entry.sha = sha
   418  			if setEntry {
   419  				c.cache.setEntry(fullName, entry)
   420  			}
   421  		}
   422  	}
   423  	return entry, nil
   424  }
   425  
   426  // ExpandAlias returns members of an alias
   427  func (a RepoAliases) ExpandAlias(alias string) sets.Set[string] {
   428  	if a == nil {
   429  		return nil
   430  	}
   431  	return a[github.NormLogin(alias)]
   432  }
   433  
   434  // ExpandAliases returns members of multiple aliases, duplicates are pruned
   435  func (a RepoAliases) ExpandAliases(logins sets.Set[string]) sets.Set[string] {
   436  	if a == nil {
   437  		return logins
   438  	}
   439  	// Make logins a copy of the original set to avoid modifying the original.
   440  	logins = logins.Union(nil)
   441  	for _, login := range sets.List(logins) {
   442  		if expanded, ok := a[github.NormLogin(login)]; ok {
   443  			logins.Delete(login)
   444  			logins = logins.Union(expanded)
   445  		}
   446  	}
   447  	return logins
   448  }
   449  
   450  // ExpandAllAliases returns members of all aliases mentioned, duplicates are pruned
   451  func (a RepoAliases) ExpandAllAliases() sets.Set[string] {
   452  	if a == nil {
   453  		return nil
   454  	}
   455  
   456  	var result, users sets.Set[string]
   457  	for alias := range a {
   458  		users = a.ExpandAlias(alias)
   459  		result = result.Union(users)
   460  	}
   461  	return result
   462  }
   463  
   464  func loadAliasesFrom(baseDir, filename string, log *logrus.Entry) RepoAliases {
   465  	path := filepath.Join(baseDir, filename)
   466  	b, err := os.ReadFile(path)
   467  	if os.IsNotExist(err) {
   468  		log.WithError(err).Infof("No alias file exists at %q. Using empty alias map.", path)
   469  		return nil
   470  	} else if err != nil {
   471  		log.WithError(err).Warnf("Failed to read alias file %q. Using empty alias map.", path)
   472  		return nil
   473  	}
   474  	result, err := ParseAliasesConfig(b)
   475  	if err != nil {
   476  		log.WithError(err).Errorf("Failed to unmarshal aliases from %q. Using empty alias map.", path)
   477  	}
   478  	log.Infof("Loaded %d aliases from %q.", len(result), path)
   479  	return result
   480  }
   481  
   482  func loadOwnersFrom(baseDir string, mdYaml bool, aliases RepoAliases, dirIgnorelist []*regexp.Regexp, filenames ownersconfig.Filenames, log *logrus.Entry) (*RepoOwners, error) {
   483  	o := &RepoOwners{
   484  		RepoAliases:  aliases,
   485  		baseDir:      baseDir,
   486  		enableMDYAML: mdYaml,
   487  		filenames:    filenames,
   488  		log:          log,
   489  
   490  		approvers:         make(map[string]map[*regexp.Regexp]sets.Set[string]),
   491  		reviewers:         make(map[string]map[*regexp.Regexp]sets.Set[string]),
   492  		requiredReviewers: make(map[string]map[*regexp.Regexp]sets.Set[string]),
   493  		labels:            make(map[string]map[*regexp.Regexp]sets.Set[string]),
   494  		options:           make(map[string]dirOptions),
   495  
   496  		dirDenylist: dirIgnorelist,
   497  	}
   498  
   499  	return o, filepath.Walk(o.baseDir, o.walkFunc)
   500  }
   501  
   502  // by default, github's api doesn't root the project directory at "/" and instead uses the empty string for the base dir
   503  // of the project. And the built-in dir function returns "." for empty strings, so for consistency, we use this
   504  // canonicalize to get the directories of files in a consistent format with NO "/" at the root (a/b/c/ -> a/b/c)
   505  func canonicalize(path string) string {
   506  	if path == "." {
   507  		return baseDirConvention
   508  	}
   509  	return strings.TrimSuffix(path, "/")
   510  }
   511  
   512  func (o *RepoOwners) walkFunc(path string, info os.FileInfo, err error) error {
   513  	log := o.log.WithField("path", path)
   514  	if err != nil {
   515  		log.WithError(err).Error("Error while walking OWNERS files.")
   516  		return nil
   517  	}
   518  	filename := filepath.Base(path)
   519  	relPath, err := filepath.Rel(o.baseDir, path)
   520  	if err != nil {
   521  		log.WithError(err).Errorf("Unable to find relative path between baseDir: %q and path.", o.baseDir)
   522  		return err
   523  	}
   524  	relPathDir := canonicalize(filepath.Dir(relPath))
   525  
   526  	if info.Mode().IsDir() {
   527  		for _, re := range o.dirDenylist {
   528  			if re.MatchString(relPath) {
   529  				return filepath.SkipDir
   530  			}
   531  		}
   532  	}
   533  	if !info.Mode().IsRegular() {
   534  		return nil
   535  	}
   536  
   537  	// '.md' files may contain assignees at the top of the file in a yaml header
   538  	// Note that these assignees only apply to the file itself.
   539  	if o.enableMDYAML && strings.HasSuffix(filename, ".md") {
   540  		// Parse the yaml header from the file if it exists and marshal into the config
   541  		simple := &SimpleConfig{}
   542  		if err := decodeOwnersMdConfig(path, simple); err != nil {
   543  			log.WithError(err).Info("Error decoding OWNERS config from '*.md' file.")
   544  			return nil
   545  		}
   546  
   547  		// Set owners for this file (not the directory) using the relative path if they were found
   548  		o.applyConfigToPath(relPath, nil, &simple.Config)
   549  		o.applyOptionsToPath(relPath, simple.Options)
   550  		return nil
   551  	}
   552  
   553  	if filename != o.filenames.Owners {
   554  		return nil
   555  	}
   556  
   557  	simple, err := o.ParseSimpleConfig(path)
   558  	if err == filepath.SkipDir {
   559  		return err
   560  	}
   561  	if err != nil || simple.Empty() {
   562  		c, err := o.ParseFullConfig(path)
   563  		if err == filepath.SkipDir {
   564  			return err
   565  		}
   566  		if err != nil {
   567  			log.WithError(err).Debugf("Failed to unmarshal %s into either Simple or FullConfig.", path)
   568  		} else {
   569  			// it's a FullConfig
   570  			for pattern, config := range c.Filters {
   571  				var re *regexp.Regexp
   572  				if pattern != ".*" {
   573  					if re, err = regexp.Compile(pattern); err != nil {
   574  						log.WithError(err).Debugf("Invalid regexp %q.", pattern)
   575  						continue
   576  					}
   577  				}
   578  				o.applyConfigToPath(relPathDir, re, &config)
   579  			}
   580  			o.applyOptionsToPath(relPathDir, c.Options)
   581  		}
   582  	} else {
   583  		// it's a SimpleConfig
   584  		o.applyConfigToPath(relPathDir, nil, &simple.Config)
   585  		o.applyOptionsToPath(relPathDir, simple.Options)
   586  	}
   587  	return nil
   588  }
   589  
   590  // ParseFullConfig will unmarshal the content of the OWNERS file at the path into a FullConfig.
   591  // If the OWNERS directory is ignorelisted, it returns filepath.SkipDir.
   592  // Returns an error if the content cannot be unmarshalled.
   593  func (o *RepoOwners) ParseFullConfig(path string) (FullConfig, error) {
   594  	// if path is in an ignored directory, ignore it
   595  	dir := filepath.Dir(path)
   596  	for _, re := range o.dirDenylist {
   597  		if re.MatchString(dir) {
   598  			return FullConfig{}, filepath.SkipDir
   599  		}
   600  	}
   601  
   602  	b, err := os.ReadFile(path)
   603  	if err != nil {
   604  		return FullConfig{}, err
   605  	}
   606  	return LoadFullConfig(b)
   607  }
   608  
   609  // ParseSimpleConfig will unmarshal the content of the OWNERS file at the path into a SimpleConfig.
   610  // If the OWNERS directory is ignorelisted, it returns filepath.SkipDir.
   611  // Returns an error if the content cannot be unmarshalled.
   612  func (o *RepoOwners) ParseSimpleConfig(path string) (SimpleConfig, error) {
   613  	// if path is in a an ignored directory, ignore it
   614  	dir := filepath.Dir(path)
   615  	for _, re := range o.dirDenylist {
   616  		if re.MatchString(dir) {
   617  			return SimpleConfig{}, filepath.SkipDir
   618  		}
   619  	}
   620  
   621  	b, err := os.ReadFile(path)
   622  	if err != nil {
   623  		return SimpleConfig{}, err
   624  	}
   625  	return LoadSimpleConfig(b)
   626  }
   627  
   628  // LoadSimpleConfig loads SimpleConfig from bytes `b`
   629  func LoadSimpleConfig(b []byte) (SimpleConfig, error) {
   630  	simple := new(SimpleConfig)
   631  	err := yaml.Unmarshal(b, simple)
   632  	return *simple, err
   633  }
   634  
   635  // SaveSimpleConfig writes SimpleConfig to `path`
   636  func SaveSimpleConfig(simple SimpleConfig, path string) error {
   637  	b, err := yaml.Marshal(simple)
   638  	if err != nil {
   639  		return nil
   640  	}
   641  	return os.WriteFile(path, b, 0644)
   642  }
   643  
   644  // LoadFullConfig loads FullConfig from bytes `b`
   645  func LoadFullConfig(b []byte) (FullConfig, error) {
   646  	full := new(FullConfig)
   647  	err := yaml.Unmarshal(b, full)
   648  	return *full, err
   649  }
   650  
   651  // SaveFullConfig writes FullConfig to `path`
   652  func SaveFullConfig(full FullConfig, path string) error {
   653  	b, err := yaml.Marshal(full)
   654  	if err != nil {
   655  		return nil
   656  	}
   657  	return os.WriteFile(path, b, 0644)
   658  }
   659  
   660  // ParseAliasesConfig will unmarshal an OWNERS_ALIASES file's content into RepoAliases.
   661  // Returns an error if the content cannot be unmarshalled.
   662  func ParseAliasesConfig(b []byte) (RepoAliases, error) {
   663  	result := make(RepoAliases)
   664  
   665  	config := &struct {
   666  		Data map[string][]string `json:"aliases,omitempty"`
   667  	}{}
   668  	if err := yaml.Unmarshal(b, config); err != nil {
   669  		return result, err
   670  	}
   671  
   672  	for alias, expanded := range config.Data {
   673  		result[github.NormLogin(alias)] = NormLogins(expanded)
   674  	}
   675  	return result, nil
   676  }
   677  
   678  var mdStructuredHeaderRegex = regexp.MustCompile("^---\n(.|\n)*\n---")
   679  
   680  // decodeOwnersMdConfig will parse the yaml header if it exists and unmarshal it into a singleOwnersConfig.
   681  // If no yaml header is found, do nothing
   682  // Returns an error if the file cannot be read or the yaml header is found but cannot be unmarshalled.
   683  func decodeOwnersMdConfig(path string, config *SimpleConfig) error {
   684  	fileBytes, err := os.ReadFile(path)
   685  	if err != nil {
   686  		return err
   687  	}
   688  	// Parse the yaml header from the top of the file.  Will return an empty string if regex does not match.
   689  	meta := mdStructuredHeaderRegex.FindString(string(fileBytes))
   690  
   691  	// Unmarshal the yaml header into the config
   692  	return yaml.Unmarshal([]byte(meta), &config)
   693  }
   694  
   695  // NormLogins normalizes logins
   696  func NormLogins(logins []string) sets.Set[string] {
   697  	normed := sets.New[string]()
   698  	for _, login := range logins {
   699  		normed.Insert(github.NormLogin(login))
   700  	}
   701  	return normed
   702  }
   703  
   704  var defaultDirOptions = dirOptions{}
   705  
   706  func (o *RepoOwners) applyConfigToPath(path string, re *regexp.Regexp, config *Config) {
   707  	if len(config.Approvers) > 0 {
   708  		if o.approvers[path] == nil {
   709  			o.approvers[path] = make(map[*regexp.Regexp]sets.Set[string])
   710  		}
   711  		o.approvers[path][re] = o.ExpandAliases(NormLogins(config.Approvers))
   712  	}
   713  	if len(config.Reviewers) > 0 {
   714  		if o.reviewers[path] == nil {
   715  			o.reviewers[path] = make(map[*regexp.Regexp]sets.Set[string])
   716  		}
   717  		o.reviewers[path][re] = o.ExpandAliases(NormLogins(config.Reviewers))
   718  	}
   719  	if len(config.RequiredReviewers) > 0 {
   720  		if o.requiredReviewers[path] == nil {
   721  			o.requiredReviewers[path] = make(map[*regexp.Regexp]sets.Set[string])
   722  		}
   723  		o.requiredReviewers[path][re] = o.ExpandAliases(NormLogins(config.RequiredReviewers))
   724  	}
   725  	if len(config.Labels) > 0 {
   726  		if o.labels[path] == nil {
   727  			o.labels[path] = make(map[*regexp.Regexp]sets.Set[string])
   728  		}
   729  		o.labels[path][re] = sets.New[string](config.Labels...)
   730  	}
   731  }
   732  
   733  func (o *RepoOwners) applyOptionsToPath(path string, opts dirOptions) {
   734  	if opts != defaultDirOptions {
   735  		o.options[path] = opts
   736  	}
   737  }
   738  
   739  func (o *RepoOwners) filterCollaborators(toKeep []github.User) *RepoOwners {
   740  	collabs := sets.New[string]()
   741  	for _, keeper := range toKeep {
   742  		collabs.Insert(github.NormLogin(keeper.Login))
   743  	}
   744  
   745  	filter := func(ownerMap map[string]map[*regexp.Regexp]sets.Set[string]) map[string]map[*regexp.Regexp]sets.Set[string] {
   746  		filtered := make(map[string]map[*regexp.Regexp]sets.Set[string])
   747  		for path, reMap := range ownerMap {
   748  			filtered[path] = make(map[*regexp.Regexp]sets.Set[string])
   749  			for re, unfiltered := range reMap {
   750  				filtered[path][re] = unfiltered.Intersection(collabs)
   751  			}
   752  		}
   753  		return filtered
   754  	}
   755  
   756  	result := *o
   757  	result.approvers = filter(o.approvers)
   758  	result.reviewers = filter(o.reviewers)
   759  	return &result
   760  }
   761  
   762  // findOwnersForFile returns the OWNERS file path furthest down the tree for a specified file
   763  // using ownerMap to check for entries
   764  func findOwnersForFile(log *logrus.Entry, path string, ownerMap map[string]map[*regexp.Regexp]sets.Set[string]) string {
   765  	d := path
   766  
   767  	for ; d != baseDirConvention; d = canonicalize(filepath.Dir(d)) {
   768  		relative, err := filepath.Rel(d, path)
   769  		if err != nil {
   770  			log.WithError(err).WithField("path", path).Errorf("Unable to find relative path between %q and path.", d)
   771  			return ""
   772  		}
   773  		for re, n := range ownerMap[d] {
   774  			if re != nil && !re.MatchString(relative) {
   775  				continue
   776  			}
   777  			if len(n) != 0 {
   778  				return d
   779  			}
   780  		}
   781  	}
   782  	return ""
   783  }
   784  
   785  // FindApproverOwnersForFile returns the directory containing the OWNERS file furthest down the tree for a specified file
   786  // that contains an approvers section
   787  func (o *RepoOwners) FindApproverOwnersForFile(path string) string {
   788  	return findOwnersForFile(o.log, path, o.approvers)
   789  }
   790  
   791  // FindReviewersOwnersForFile returns the OWNERS file path furthest down the tree for a specified file
   792  // that contains a reviewers section
   793  func (o *RepoOwners) FindReviewersOwnersForFile(path string) string {
   794  	return findOwnersForFile(o.log, path, o.reviewers)
   795  }
   796  
   797  // FindLabelsForFile returns a set of labels which should be applied to PRs
   798  // modifying files under the given path.
   799  func (o *RepoOwners) FindLabelsForFile(path string) sets.Set[string] {
   800  	return o.entriesForFile(path, o.labels, false).Set()
   801  }
   802  
   803  // IsNoParentOwners checks if an OWNERS file path refers to an OWNERS file with NoParentOwners enabled.
   804  func (o *RepoOwners) IsNoParentOwners(path string) bool {
   805  	return o.options[path].NoParentOwners
   806  }
   807  
   808  func (o *RepoOwners) IsAutoApproveUnownedSubfolders(ownersFilePath string) bool {
   809  	return o.options[ownersFilePath].AutoApproveUnownedSubfolders
   810  }
   811  
   812  // entriesForFile returns a set of users who are assignees to the
   813  // requested file. The path variable should be a full path to a filename
   814  // and not directory as the final directory will be discounted if enableMDYAML is true
   815  // leafOnly indicates whether only the OWNERS deepest in the tree (closest to the file)
   816  // should be returned or if all OWNERS in filepath should be returned
   817  func (o *RepoOwners) entriesForFile(path string, people map[string]map[*regexp.Regexp]sets.Set[string], leafOnly bool) layeredsets.String {
   818  	d := path
   819  	if !o.enableMDYAML || !strings.HasSuffix(path, ".md") {
   820  		d = canonicalize(d)
   821  	}
   822  
   823  	out := layeredsets.NewString()
   824  	var layerID int
   825  	for {
   826  		relative, err := filepath.Rel(d, path)
   827  		if err != nil {
   828  			o.log.WithError(err).WithField("path", path).Errorf("Unable to find relative path between %q and path.", d)
   829  			return nil
   830  		}
   831  		for re, s := range people[d] {
   832  			if re == nil || re.MatchString(relative) {
   833  				out.Insert(layerID, sets.List(s)...)
   834  			}
   835  		}
   836  		if leafOnly && out.Len() > 0 {
   837  			break
   838  		}
   839  		if d == baseDirConvention {
   840  			break
   841  		}
   842  		if o.options[d].NoParentOwners && out.Len() > 0 {
   843  			break
   844  		}
   845  		d = filepath.Dir(d)
   846  		d = canonicalize(d)
   847  		layerID++
   848  	}
   849  	return out
   850  }
   851  
   852  // LeafApprovers returns a set of users who are the closest approvers to the
   853  // requested file. If pkg/OWNERS has user1 and pkg/util/OWNERS has user2 this
   854  // will only return user2 for the path pkg/util/sets/file.go
   855  func (o *RepoOwners) LeafApprovers(path string) sets.Set[string] {
   856  	return o.entriesForFile(path, o.approvers, true).Set()
   857  }
   858  
   859  // Approvers returns ALL of the users who are approvers for the
   860  // requested file (including approvers in parent dirs' OWNERS).
   861  // If pkg/OWNERS has user1 and pkg/util/OWNERS has user2 this
   862  // will return both user1 and user2 for the path pkg/util/sets/file.go
   863  func (o *RepoOwners) Approvers(path string) layeredsets.String {
   864  	return o.entriesForFile(path, o.approvers, false)
   865  }
   866  
   867  // LeafReviewers returns a set of users who are the closest reviewers to the
   868  // requested file. If pkg/OWNERS has user1 and pkg/util/OWNERS has user2 this
   869  // will only return user2 for the path pkg/util/sets/file.go
   870  func (o *RepoOwners) LeafReviewers(path string) sets.Set[string] {
   871  	return o.entriesForFile(path, o.reviewers, true).Set()
   872  }
   873  
   874  // Reviewers returns ALL of the users who are reviewers for the
   875  // requested file (including reviewers in parent dirs' OWNERS).
   876  // If pkg/OWNERS has user1 and pkg/util/OWNERS has user2 this
   877  // will return both user1 and user2 for the path pkg/util/sets/file.go
   878  func (o *RepoOwners) Reviewers(path string) layeredsets.String {
   879  	return o.entriesForFile(path, o.reviewers, false)
   880  }
   881  
   882  // RequiredReviewers returns ALL of the users who are required_reviewers for the
   883  // requested file (including required_reviewers in parent dirs' OWNERS).
   884  // If pkg/OWNERS has user1 and pkg/util/OWNERS has user2 this
   885  // will return both user1 and user2 for the path pkg/util/sets/file.go
   886  func (o *RepoOwners) RequiredReviewers(path string) sets.Set[string] {
   887  	return o.entriesForFile(path, o.requiredReviewers, false).Set()
   888  }
   889  
   890  func (o *RepoOwners) TopLevelApprovers() sets.Set[string] {
   891  	return o.entriesForFile(".", o.approvers, true).Set()
   892  }
   893  
   894  // AllOwners returns ALL of the users who are approvers or reviewers,
   895  // at least for a file across the structure of the repository.
   896  // If pkg/OWNERS has user1 as approver and user2 as reviewer,
   897  // and pkg/util has user3 as approver and user4 as reviewer,
   898  // the function will return user1, user2, user3, and user4.
   899  func (o *RepoOwners) AllOwners() sets.Set[string] {
   900  	allOwners := sets.New[string]()
   901  
   902  	allOwners = allOwners.Union(o.AllApprovers())
   903  	allOwners = allOwners.Union(o.AllReviewers())
   904  
   905  	return allOwners
   906  }
   907  
   908  // AllApprovers returns ALL of the users who are approvers,
   909  // at least for a file across the structure of the repository.
   910  // If pkg/OWNERS has user1 as approver and user2 as reviewer,
   911  // and pkg/util has user3 as approver and user4 as reviewer,
   912  // the function will return user1, and user3.
   913  func (o *RepoOwners) AllApprovers() sets.Set[string] {
   914  	allApprovers := sets.New[string]()
   915  	for _, pv := range o.approvers {
   916  		for _, rv := range pv {
   917  			allApprovers = allApprovers.Union(rv)
   918  		}
   919  	}
   920  
   921  	return allApprovers
   922  }
   923  
   924  // AllReviewers returns ALL of the users who are reviewers,
   925  // at least for a file across the structure of the repository.
   926  // If pkg/OWNERS has user1 as approver and user2 as reviewer,
   927  // and pkg/util has user3 as approver and user4 as reviewer,
   928  // the function will return user2, and user4.
   929  func (o *RepoOwners) AllReviewers() sets.Set[string] {
   930  	allReviewers := sets.New[string]()
   931  	for _, pv := range o.reviewers {
   932  		for _, rv := range pv {
   933  			allReviewers = allReviewers.Union(rv)
   934  		}
   935  	}
   936  
   937  	return allReviewers
   938  }