github.com/wtsi-ssg/wrstat@v1.1.4-0.20221008232152-3030622a8cf8/ch/from.go (about) 1 /******************************************************************************* 2 * Copyright (c) 2021 Genome Research Ltd. 3 * 4 * Author: Sendu Bala <sb10@sanger.ac.uk> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 ******************************************************************************/ 25 26 package ch 27 28 import ( 29 "os/user" 30 "regexp" 31 "strconv" 32 "strings" 33 34 "github.com/inconshreveable/log15" 35 "gopkg.in/yaml.v2" 36 ) 37 38 // regexp* consts relate to the groups matched in our main regexp. 39 const ( 40 regexpSubgroups = 4 41 regexpDirPart = 2 42 regexpGroupPart = 3 43 ) 44 45 const badUnixGroup = -1 46 47 type Error string 48 49 func (e Error) Error() string { return string(e) } 50 51 const errInvalidYAML = Error("YAML is missing properties") 52 53 // GIDFromSubDir provides a PathChecker that can decide if a path should be 54 // looked at based on matching a prefix followed by a certain sub directory 55 // (lookupDir or directDir), and also decides what the GID of that path should 56 // be based on the sub dir of that sub dir. 57 // 58 // For subdirs of lookup directories, it that converts from directory name to 59 // desired unix group name using the lookup, then gets the GID for that unix 60 // group. 61 // 62 // For subdirs of direct directories, it treats the directory name as a unix 63 // group name, and gets the GID of that unix group. You can supply exceptions 64 // where your own GID is used instead. 65 // 66 // With prefixes: 67 // "/disk1", "/disk2/sub", "/disk3" 68 // 69 // And a lookupDir of "teams" and a directDir of "projects" 70 // 71 // And lookup: 72 // "ay": "dee" 73 // 74 // And exceptions: 75 // "cee": 12345 76 // 77 // Given the paths: 78 // 1) /disk1/teams/ay/file1.txt 79 // 2) /disk2/sub/projects/bee/file2.txt 80 // 3) /disk2/sub/projects/cee/file3.txt 81 // 4) /disk3/file4.txt 82 // 5) /disk1/teams/new/file5.txt 83 // 6) /disk2/sub/projects/not_a_unix_group_name/file6.txt 84 // 85 // The .PathChecker() will return the following for each file: 86 // 1) true, [gid of unix group dee] 87 // 2) true, [gid of unix group bee] 88 // 3) true, 12345 89 // 4) false, n/a 90 // 5) false, n/a [and logs an error that "new" wasn't a known lookup] 91 // 6) false, n/a [and logs an error that "not_a_unix_group_name" has no GID]. 92 type GIDFromSubDir struct { 93 r *regexp.Regexp 94 lookupDir string 95 directDir string 96 lookup map[string]int 97 exceptions map[string]int 98 logger log15.Logger 99 } 100 101 // NewGIDFromSubDir returns a GIDFromSubDir. 102 // 103 // prefixes are absolute paths to directories that our PathChecker will return 104 // true for if the path matches a prefix and also contains a subdirectory named 105 // [lookupDir] or [directDir], and the path is for an entry located within a 106 // further subdirectory of that. 107 // 108 // lookupDir is the name of a subdirectory of the prefix paths that contains 109 // further subdirectories that are keys in the given lookup map. The values in 110 // the map are the desired unix group names, which will be converted to GIDs. 111 // 112 // directDir is the name of a subdirectory of the prefix paths that contains 113 // further subdirectories that are named after unix group names. Or their name 114 // is a key in the exceptions map, and the corresponding value will be the GID 115 // used. NB. unix group name to GID lookups are cached in the supplied 116 // exceptions map. 117 // 118 // If lookupDir contains a subdirectory that isn't in your lookup, or directDir 119 // contains a subdirectory that isn't in your exceptions and isn't a unix group 120 // name, these issues are logged to the given logger, and the PathChecker will 121 // return false. 122 func NewGIDFromSubDir(prefixes []string, lookupDir string, lookup map[string]string, 123 directDir string, exceptions map[string]int, logger log15.Logger) (*GIDFromSubDir, error) { 124 r := createPrefixRegex(prefixes, lookupDir, directDir) 125 126 gidLookup, err := createGIDLookup(lookup) 127 if err != nil { 128 return nil, err 129 } 130 131 return &GIDFromSubDir{ 132 r: r, 133 lookupDir: lookupDir, 134 lookup: gidLookup, 135 directDir: directDir, 136 exceptions: exceptions, 137 logger: logger, 138 }, nil 139 } 140 141 // createPrefixRegex creates a regexp that matches on given prefixes followed by 142 // one of lookupDir or directDir, followed by another subdir, and has each as 143 // capture groups. 144 func createPrefixRegex(prefixes []string, lookupDir, directDir string) *regexp.Regexp { 145 escapedPrefixes := make([]string, len(prefixes)) 146 for i, prefix := range prefixes { 147 escapedPrefixes[i] = regexp.QuoteMeta(prefix) 148 } 149 150 expr := `^(` + strings.Join(escapedPrefixes, `|`) + `)\/(` + 151 regexp.QuoteMeta(lookupDir) + `|` + 152 regexp.QuoteMeta(directDir) + `)\/([^/]+)\/.*$` 153 154 return regexp.MustCompile(expr) 155 } 156 157 // createGIDLookup takes the given lookup values (treating them as unix group 158 // names), converts them to GIDs, and returns a new map with the same keys. 159 func createGIDLookup(lookup map[string]string) (map[string]int, error) { 160 gl := make(map[string]int, len(lookup)) 161 162 for key, name := range lookup { 163 gid, err := gidFromName(name) 164 if err != nil { 165 return nil, err 166 } 167 168 gl[key] = gid 169 } 170 171 return gl, nil 172 } 173 174 // gidFromName looks up unix group by name and returns the GID as an int. 175 func gidFromName(group string) (int, error) { 176 g, err := user.LookupGroup(group) 177 if err != nil { 178 return 0, err 179 } 180 181 return strconv.Atoi(g.Gid) 182 } 183 184 // PathChecker returns a PathChecker that can be used with a Ch. 185 func (f *GIDFromSubDir) PathChecker() PathChecker { 186 return func(path string) (change bool, gid int) { 187 parts := f.r.FindStringSubmatch(path) 188 if len(parts) != regexpSubgroups { 189 return 190 } 191 192 if parts[regexpDirPart] == f.lookupDir { 193 gid = f.lookupGID(parts[regexpGroupPart], path) 194 } else { 195 gid = f.directGID(parts[regexpGroupPart], path) 196 } 197 198 change = gid != badUnixGroup 199 200 return 201 } 202 } 203 204 // lookupGID returns the GID corresponding to the unix group value in our 205 // lookup with the given key. 206 func (f *GIDFromSubDir) lookupGID(key, fullPath string) int { 207 if gid, set := f.lookup[key]; set { 208 return gid 209 } 210 211 f.logger.Warn("subdir not in group lookup", "dir", key, "path", fullPath) 212 213 return badUnixGroup 214 } 215 216 // directGID returns the GID corresponding to the given unix group, unless group 217 // is in our exceptions map, in which case that value is returned. 218 func (f *GIDFromSubDir) directGID(group, fullPath string) int { 219 if gid, set := f.exceptions[group]; set { 220 return gid 221 } 222 223 gid, err := gidFromName(group) 224 if err != nil { 225 f.logger.Warn("subdir not a unix group name", "dir", group, "path", fullPath) 226 227 gid = badUnixGroup 228 } 229 230 f.exceptions[group] = gid 231 232 return gid 233 } 234 235 // yamlForGIDFromSubDir is the struct we decode YAML in to during 236 // NewGIDFromSubDirFromYAML(). 237 type yamlForGIDFromSubDir struct { 238 Prefixes []string 239 LookupDir string `yaml:"lookupDir"` 240 DirectDir string `yaml:"directDir"` 241 Lookup map[string]string 242 Exceptions map[string]int 243 } 244 245 // valid checks that required fields have been set. 246 func (y *yamlForGIDFromSubDir) valid() bool { 247 if len(y.Prefixes) == 0 || y.LookupDir == "" || y.DirectDir == "" || len(y.Lookup) == 0 { 248 return false 249 } 250 251 if y.Exceptions == nil { 252 y.Exceptions = make(map[string]int) 253 } 254 255 return true 256 } 257 258 // NewGIDFromSubDirFromYAML is like NewGIDFromSubDir, but instead of supplying 259 // all the different args, you supply it YAML data in the following format: 260 // 261 // prefixes: ["/disk1", "/disk2/sub", "/disk3"] 262 // lookupDir: subdir_name_of_prefixes_that_contains_subdirs_in_lookup 263 // lookup: 264 // foo: unix_group_name 265 // directDir: subdir_of_prefixes_with_unix_group_or_exception_subdirs 266 // exceptions: 267 // bar: GID 268 func NewGIDFromSubDirFromYAML(data []byte, logger log15.Logger) (*GIDFromSubDir, error) { 269 var yfgfs yamlForGIDFromSubDir 270 271 err := yaml.Unmarshal(data, &yfgfs) 272 if err != nil { 273 return nil, err 274 } 275 276 if !yfgfs.valid() { 277 return nil, errInvalidYAML 278 } 279 280 return NewGIDFromSubDir(yfgfs.Prefixes, yfgfs.LookupDir, yfgfs.Lookup, yfgfs.DirectDir, yfgfs.Exceptions, logger) 281 }