github.com/wtsi-ssg/wrstat/v4@v4.5.1/summary/usergroup.go (about) 1 /******************************************************************************* 2 * Copyright (c) 2021 Genome Research Ltd. 3 * 4 * Author: Sendu Bala <sb10@sanger.ac.uk> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 ******************************************************************************/ 25 26 package summary 27 28 import ( 29 "fmt" 30 "io/fs" 31 "os/user" 32 "path/filepath" 33 "sort" 34 "strconv" 35 "syscall" 36 ) 37 38 type Error string 39 40 func (e Error) Error() string { return string(e) } 41 42 const errNotUnix = Error("file info Sys() was not a *syscall.Stat_t; only unix is supported") 43 44 // dirStore is a sortable map with directory paths as keys and summaries as 45 // values. 46 type dirStore map[string]*summary 47 48 // addForEachDir breaks path into each directory and calls add() on it. 49 func (store dirStore) addForEachDir(path string, size int64) { 50 cb := func(dir string) { 51 store.add(dir, size) 52 } 53 54 doForEachDir(path, cb) 55 } 56 57 // doForEachDir breaks path into each sub-directory, and passes each to the 58 // given callback. 59 func doForEachDir(path string, cb func(dir string)) { 60 dir := filepath.Dir(path) 61 62 for { 63 cb(dir) 64 65 if dir == "/" || dir == "." { 66 return 67 } 68 69 dir = filepath.Dir(dir) 70 } 71 } 72 73 // add will auto-vivify a summary for the given directory path and call 74 // add(size) on it. 75 func (store dirStore) add(path string, size int64) { 76 s, ok := store[path] 77 if !ok { 78 s = &summary{} 79 store[path] = s 80 } 81 82 s.add(size) 83 } 84 85 // sort returns a slice of our summary values, sorted by our directory path 86 // keys which are also returned. 87 func (store dirStore) sort() ([]string, []*summary) { 88 return sortSummaryStore(store) 89 } 90 91 // sortSummaryStore returns a slice of the store's values, sorted by the store's 92 // keys which are also returned. 93 func sortSummaryStore[T any](store map[string]*T) ([]string, []*T) { 94 keys := make([]string, len(store)) 95 i := 0 96 97 for k := range store { 98 keys[i] = k 99 i++ 100 } 101 102 sort.Strings(keys) 103 104 s := make([]*T, len(store)) 105 106 for i, k := range keys { 107 s[i] = store[k] 108 } 109 110 return keys, s 111 } 112 113 // groupStore is a sortable map of gid to dirStore. 114 type groupStore map[uint32]dirStore 115 116 // getDirStore auto-vivifies a dirStore for the given gid and returns it. 117 func (store groupStore) getDirStore(gid uint32) dirStore { 118 dStore, ok := store[gid] 119 if !ok { 120 dStore = make(dirStore) 121 store[gid] = dStore 122 } 123 124 return dStore 125 } 126 127 // sort returns a slice of our dirStore values, sorted by our gid keys converted 128 // to group names, which are also returned. 129 // 130 // If a gid is invalid, the name will be id[gid]. 131 // 132 // If you will be sorting multiple different groupStores, supply them all the 133 // same gidLookupCache which is used to minimise gid to name lookups. 134 func (store groupStore) sort(gidLookupCache map[uint32]string) ([]string, []dirStore) { 135 byGroupName := make(map[string]dirStore) 136 137 for gid, dStore := range store { 138 byGroupName[gidToName(gid, gidLookupCache)] = dStore 139 } 140 141 keys := make([]string, len(byGroupName)) 142 i := 0 143 144 for k := range byGroupName { 145 keys[i] = k 146 i++ 147 } 148 149 sort.Strings(keys) 150 151 s := make([]dirStore, len(byGroupName)) 152 153 for i, k := range keys { 154 s[i] = byGroupName[k] 155 } 156 157 return keys, s 158 } 159 160 // gidToName converts gid to group name, using the given cache to avoid lookups. 161 func gidToName(gid uint32, cache map[uint32]string) string { 162 return cachedIDToName(gid, cache, getGroupName) 163 } 164 165 func cachedIDToName(id uint32, cache map[uint32]string, lookup func(uint32) string) string { 166 if name, ok := cache[id]; ok { 167 return name 168 } 169 170 name := lookup(id) 171 172 cache[id] = name 173 174 return name 175 } 176 177 // getGroupName returns the name of the group given gid. If the lookup fails, 178 // returns "idxxx", where xxx is the given id as a string. 179 func getGroupName(id uint32) string { 180 sid := strconv.Itoa(int(id)) 181 182 g, err := user.LookupGroupId(sid) 183 if err != nil { 184 return "id" + sid 185 } 186 187 return g.Name 188 } 189 190 // userStore is a sortable map of uid to groupStore. 191 type userStore map[uint32]groupStore 192 193 // DirStore auto-vivifies an entry in our store for the given uid and gid and 194 // returns it. 195 func (store userStore) DirStore(uid, gid uint32) dirStore { 196 return store.getGroupStore(uid).getDirStore(gid) 197 } 198 199 // getGroupStore auto-vivifies a groupStore for the given uid and returns it. 200 func (store userStore) getGroupStore(uid uint32) groupStore { 201 gStore, ok := store[uid] 202 if !ok { 203 gStore = make(groupStore) 204 store[uid] = gStore 205 } 206 207 return gStore 208 } 209 210 // sort returns a slice of our groupStore values, sorted by our uid keys 211 // converted to user names, which are also returned. If uid has no user name, 212 // user name will be id[uid]. 213 func (store userStore) sort() ([]string, []groupStore) { 214 byUserName := make(map[string]groupStore) 215 216 for uid, gids := range store { 217 byUserName[getUserName(uid)] = gids 218 } 219 220 keys := make([]string, len(byUserName)) 221 i := 0 222 223 for k := range byUserName { 224 keys[i] = k 225 i++ 226 } 227 228 sort.Strings(keys) 229 230 s := make([]groupStore, len(byUserName)) 231 232 for i, k := range keys { 233 s[i] = byUserName[k] 234 } 235 236 return keys, s 237 } 238 239 // getUserName returns the username of the given uid. If the lookup fails, 240 // returns "idxxx", where xxx is the given id as a string. 241 func getUserName(id uint32) string { 242 sid := strconv.Itoa(int(id)) 243 244 u, err := user.LookupId(sid) 245 if err != nil { 246 return "id" + sid 247 } 248 249 return u.Username 250 } 251 252 // Usergroup is used to summarise file stats by user and group. 253 type Usergroup struct { 254 store userStore 255 } 256 257 // NewByUserGroup returns a Usergroup. 258 func NewByUserGroup() *Usergroup { 259 return &Usergroup{ 260 store: make(userStore), 261 } 262 } 263 264 // Add is a github.com/wtsi-ssg/wrstat/stat Operation. It will break path in to 265 // its directories and add the file size and increment the file count to each, 266 // summed for the info's user and group. If path is a directory, it is ignored. 267 func (u *Usergroup) Add(path string, info fs.FileInfo) error { 268 if info.IsDir() { 269 return nil 270 } 271 272 stat, ok := info.Sys().(*syscall.Stat_t) 273 if !ok { 274 return errNotUnix 275 } 276 277 dStore := u.store.DirStore(stat.Uid, stat.Gid) 278 279 dStore.addForEachDir(path, info.Size()) 280 281 return nil 282 } 283 284 // Output will write summary information for all the paths previously added. The 285 // format is (tab separated): 286 // 287 // username group directory filecount filesize 288 // 289 // usernames, groups and directories are sorted. 290 // 291 // Returns an error on failure to write, or if username or group can't be 292 // determined from the uids and gids in the added file info. output is closed 293 // on completion. 294 func (u *Usergroup) Output(output StringCloser) error { 295 users, gStores := u.store.sort() 296 297 gidLookupCache := make(map[uint32]string) 298 299 for i, username := range users { 300 if err := outputGroupDirectorySummariesForUser(output, username, gStores[i], gidLookupCache); err != nil { 301 return err 302 } 303 } 304 305 return output.Close() 306 } 307 308 // outputGroupDirectorySummariesForUser sortes the groups for this user and 309 // calls outputDirectorySummariesForGroup. 310 func outputGroupDirectorySummariesForUser(output StringCloser, username string, 311 gStore groupStore, gidLookupCache map[uint32]string) error { 312 groupnames, dStores := gStore.sort(gidLookupCache) 313 314 for i, groupname := range groupnames { 315 if err := outputDirectorySummariesForGroup(output, username, groupname, dStores[i]); err != nil { 316 return err 317 } 318 } 319 320 return nil 321 } 322 323 // outputDirectorySummariesForGroup sorts the directories for this group and 324 // does the actual output of all the summary information. 325 func outputDirectorySummariesForGroup(output StringCloser, username, groupname string, dStore dirStore) error { 326 dirs, summaries := dStore.sort() 327 328 for i, s := range summaries { 329 _, errw := output.WriteString(fmt.Sprintf("%s\t%s\t%s\t%d\t%d\n", username, groupname, dirs[i], s.count, s.size)) 330 if errw != nil { 331 return errw 332 } 333 } 334 335 return nil 336 }