github.com/wtsi-ssg/wrstat@v1.1.4-0.20221008232152-3030622a8cf8/ch/from.go (about)

     1  /*******************************************************************************
     2   * Copyright (c) 2021 Genome Research Ltd.
     3   *
     4   * Author: Sendu Bala <sb10@sanger.ac.uk>
     5   *
     6   * Permission is hereby granted, free of charge, to any person obtaining
     7   * a copy of this software and associated documentation files (the
     8   * "Software"), to deal in the Software without restriction, including
     9   * without limitation the rights to use, copy, modify, merge, publish,
    10   * distribute, sublicense, and/or sell copies of the Software, and to
    11   * permit persons to whom the Software is furnished to do so, subject to
    12   * the following conditions:
    13   *
    14   * The above copyright notice and this permission notice shall be included
    15   * in all copies or substantial portions of the Software.
    16   *
    17   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    18   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    19   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    20   * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    21   * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    22   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    23   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    24   ******************************************************************************/
    25  
    26  package ch
    27  
    28  import (
    29  	"os/user"
    30  	"regexp"
    31  	"strconv"
    32  	"strings"
    33  
    34  	"github.com/inconshreveable/log15"
    35  	"gopkg.in/yaml.v2"
    36  )
    37  
    38  // regexp* consts relate to the groups matched in our main regexp.
    39  const (
    40  	regexpSubgroups = 4
    41  	regexpDirPart   = 2
    42  	regexpGroupPart = 3
    43  )
    44  
    45  const badUnixGroup = -1
    46  
    47  type Error string
    48  
    49  func (e Error) Error() string { return string(e) }
    50  
    51  const errInvalidYAML = Error("YAML is missing properties")
    52  
    53  // GIDFromSubDir provides a PathChecker that can decide if a path should be
    54  // looked at based on matching a prefix followed by a certain sub directory
    55  // (lookupDir or directDir), and also decides what the GID of that path should
    56  // be based on the sub dir of that sub dir.
    57  //
    58  // For subdirs of lookup directories, it that converts from directory name to
    59  // desired unix group name using the lookup, then gets the GID for that unix
    60  // group.
    61  //
    62  // For subdirs of direct directories, it treats the directory name as a unix
    63  // group name, and gets the GID of that unix group. You can supply exceptions
    64  // where your own GID is used instead.
    65  //
    66  // With prefixes:
    67  // "/disk1", "/disk2/sub", "/disk3"
    68  //
    69  // And a lookupDir of "teams" and a directDir of "projects"
    70  //
    71  // And lookup:
    72  // "ay": "dee"
    73  //
    74  // And exceptions:
    75  // "cee": 12345
    76  //
    77  // Given the paths:
    78  // 1) /disk1/teams/ay/file1.txt
    79  // 2) /disk2/sub/projects/bee/file2.txt
    80  // 3) /disk2/sub/projects/cee/file3.txt
    81  // 4) /disk3/file4.txt
    82  // 5) /disk1/teams/new/file5.txt
    83  // 6) /disk2/sub/projects/not_a_unix_group_name/file6.txt
    84  //
    85  // The .PathChecker() will return the following for each file:
    86  // 1) true, [gid of unix group dee]
    87  // 2) true, [gid of unix group bee]
    88  // 3) true, 12345
    89  // 4) false, n/a
    90  // 5) false, n/a [and logs an error that "new" wasn't a known lookup]
    91  // 6) false, n/a [and logs an error that "not_a_unix_group_name" has no GID].
    92  type GIDFromSubDir struct {
    93  	r          *regexp.Regexp
    94  	lookupDir  string
    95  	directDir  string
    96  	lookup     map[string]int
    97  	exceptions map[string]int
    98  	logger     log15.Logger
    99  }
   100  
   101  // NewGIDFromSubDir returns a GIDFromSubDir.
   102  //
   103  // prefixes are absolute paths to directories that our PathChecker will return
   104  // true for if the path matches a prefix and also contains a subdirectory named
   105  // [lookupDir] or [directDir], and the path is for an entry located within a
   106  // further subdirectory of that.
   107  //
   108  // lookupDir is the name of a subdirectory of the prefix paths that contains
   109  // further subdirectories that are keys in the given lookup map. The values in
   110  // the map are the desired unix group names, which will be converted to GIDs.
   111  //
   112  // directDir is the name of a subdirectory of the prefix paths that contains
   113  // further subdirectories that are named after unix group names. Or their name
   114  // is a key in the exceptions map, and the corresponding value will be the GID
   115  // used. NB. unix group name to GID lookups are cached in the supplied
   116  // exceptions map.
   117  //
   118  // If lookupDir contains a subdirectory that isn't in your lookup, or directDir
   119  // contains a subdirectory that isn't in your exceptions and isn't a unix group
   120  // name, these issues are logged to the given logger, and the PathChecker will
   121  // return false.
   122  func NewGIDFromSubDir(prefixes []string, lookupDir string, lookup map[string]string,
   123  	directDir string, exceptions map[string]int, logger log15.Logger) (*GIDFromSubDir, error) {
   124  	r := createPrefixRegex(prefixes, lookupDir, directDir)
   125  
   126  	gidLookup, err := createGIDLookup(lookup)
   127  	if err != nil {
   128  		return nil, err
   129  	}
   130  
   131  	return &GIDFromSubDir{
   132  		r:          r,
   133  		lookupDir:  lookupDir,
   134  		lookup:     gidLookup,
   135  		directDir:  directDir,
   136  		exceptions: exceptions,
   137  		logger:     logger,
   138  	}, nil
   139  }
   140  
   141  // createPrefixRegex creates a regexp that matches on given prefixes followed by
   142  // one of lookupDir or directDir, followed by another subdir, and has each as
   143  // capture groups.
   144  func createPrefixRegex(prefixes []string, lookupDir, directDir string) *regexp.Regexp {
   145  	escapedPrefixes := make([]string, len(prefixes))
   146  	for i, prefix := range prefixes {
   147  		escapedPrefixes[i] = regexp.QuoteMeta(prefix)
   148  	}
   149  
   150  	expr := `^(` + strings.Join(escapedPrefixes, `|`) + `)\/(` +
   151  		regexp.QuoteMeta(lookupDir) + `|` +
   152  		regexp.QuoteMeta(directDir) + `)\/([^/]+)\/.*$`
   153  
   154  	return regexp.MustCompile(expr)
   155  }
   156  
   157  // createGIDLookup takes the given lookup values (treating them as unix group
   158  // names), converts them to GIDs, and returns a new map with the same keys.
   159  func createGIDLookup(lookup map[string]string) (map[string]int, error) {
   160  	gl := make(map[string]int, len(lookup))
   161  
   162  	for key, name := range lookup {
   163  		gid, err := gidFromName(name)
   164  		if err != nil {
   165  			return nil, err
   166  		}
   167  
   168  		gl[key] = gid
   169  	}
   170  
   171  	return gl, nil
   172  }
   173  
   174  // gidFromName looks up unix group by name and returns the GID as an int.
   175  func gidFromName(group string) (int, error) {
   176  	g, err := user.LookupGroup(group)
   177  	if err != nil {
   178  		return 0, err
   179  	}
   180  
   181  	return strconv.Atoi(g.Gid)
   182  }
   183  
   184  // PathChecker returns a PathChecker that can be used with a Ch.
   185  func (f *GIDFromSubDir) PathChecker() PathChecker {
   186  	return func(path string) (change bool, gid int) {
   187  		parts := f.r.FindStringSubmatch(path)
   188  		if len(parts) != regexpSubgroups {
   189  			return
   190  		}
   191  
   192  		if parts[regexpDirPart] == f.lookupDir {
   193  			gid = f.lookupGID(parts[regexpGroupPart], path)
   194  		} else {
   195  			gid = f.directGID(parts[regexpGroupPart], path)
   196  		}
   197  
   198  		change = gid != badUnixGroup
   199  
   200  		return
   201  	}
   202  }
   203  
   204  // lookupGID returns the GID corresponding to the unix group value in our
   205  // lookup with the given key.
   206  func (f *GIDFromSubDir) lookupGID(key, fullPath string) int {
   207  	if gid, set := f.lookup[key]; set {
   208  		return gid
   209  	}
   210  
   211  	f.logger.Warn("subdir not in group lookup", "dir", key, "path", fullPath)
   212  
   213  	return badUnixGroup
   214  }
   215  
   216  // directGID returns the GID corresponding to the given unix group, unless group
   217  // is in our exceptions map, in which case that value is returned.
   218  func (f *GIDFromSubDir) directGID(group, fullPath string) int {
   219  	if gid, set := f.exceptions[group]; set {
   220  		return gid
   221  	}
   222  
   223  	gid, err := gidFromName(group)
   224  	if err != nil {
   225  		f.logger.Warn("subdir not a unix group name", "dir", group, "path", fullPath)
   226  
   227  		gid = badUnixGroup
   228  	}
   229  
   230  	f.exceptions[group] = gid
   231  
   232  	return gid
   233  }
   234  
   235  // yamlForGIDFromSubDir is the struct we decode YAML in to during
   236  // NewGIDFromSubDirFromYAML().
   237  type yamlForGIDFromSubDir struct {
   238  	Prefixes   []string
   239  	LookupDir  string `yaml:"lookupDir"`
   240  	DirectDir  string `yaml:"directDir"`
   241  	Lookup     map[string]string
   242  	Exceptions map[string]int
   243  }
   244  
   245  // valid checks that required fields have been set.
   246  func (y *yamlForGIDFromSubDir) valid() bool {
   247  	if len(y.Prefixes) == 0 || y.LookupDir == "" || y.DirectDir == "" || len(y.Lookup) == 0 {
   248  		return false
   249  	}
   250  
   251  	if y.Exceptions == nil {
   252  		y.Exceptions = make(map[string]int)
   253  	}
   254  
   255  	return true
   256  }
   257  
   258  // NewGIDFromSubDirFromYAML is like NewGIDFromSubDir, but instead of supplying
   259  // all the different args, you supply it YAML data in the following format:
   260  //
   261  // prefixes: ["/disk1", "/disk2/sub", "/disk3"]
   262  // lookupDir: subdir_name_of_prefixes_that_contains_subdirs_in_lookup
   263  // lookup:
   264  //   foo: unix_group_name
   265  // directDir: subdir_of_prefixes_with_unix_group_or_exception_subdirs
   266  // exceptions:
   267  //   bar: GID
   268  func NewGIDFromSubDirFromYAML(data []byte, logger log15.Logger) (*GIDFromSubDir, error) {
   269  	var yfgfs yamlForGIDFromSubDir
   270  
   271  	err := yaml.Unmarshal(data, &yfgfs)
   272  	if err != nil {
   273  		return nil, err
   274  	}
   275  
   276  	if !yfgfs.valid() {
   277  		return nil, errInvalidYAML
   278  	}
   279  
   280  	return NewGIDFromSubDir(yfgfs.Prefixes, yfgfs.LookupDir, yfgfs.Lookup, yfgfs.DirectDir, yfgfs.Exceptions, logger)
   281  }