github.com/wtsi-ssg/wrstat/v4@v4.5.1/summary/usergroup.go (about)

     1  /*******************************************************************************
     2   * Copyright (c) 2021 Genome Research Ltd.
     3   *
     4   * Author: Sendu Bala <sb10@sanger.ac.uk>
     5   *
     6   * Permission is hereby granted, free of charge, to any person obtaining
     7   * a copy of this software and associated documentation files (the
     8   * "Software"), to deal in the Software without restriction, including
     9   * without limitation the rights to use, copy, modify, merge, publish,
    10   * distribute, sublicense, and/or sell copies of the Software, and to
    11   * permit persons to whom the Software is furnished to do so, subject to
    12   * the following conditions:
    13   *
    14   * The above copyright notice and this permission notice shall be included
    15   * in all copies or substantial portions of the Software.
    16   *
    17   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    18   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    19   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    20   * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    21   * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    22   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    23   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    24   ******************************************************************************/
    25  
    26  package summary
    27  
    28  import (
    29  	"fmt"
    30  	"io/fs"
    31  	"os/user"
    32  	"path/filepath"
    33  	"sort"
    34  	"strconv"
    35  	"syscall"
    36  )
    37  
    38  type Error string
    39  
    40  func (e Error) Error() string { return string(e) }
    41  
    42  const errNotUnix = Error("file info Sys() was not a *syscall.Stat_t; only unix is supported")
    43  
    44  // dirStore is a sortable map with directory paths as keys and summaries as
    45  // values.
    46  type dirStore map[string]*summary
    47  
    48  // addForEachDir breaks path into each directory and calls add() on it.
    49  func (store dirStore) addForEachDir(path string, size int64) {
    50  	cb := func(dir string) {
    51  		store.add(dir, size)
    52  	}
    53  
    54  	doForEachDir(path, cb)
    55  }
    56  
    57  // doForEachDir breaks path into each sub-directory, and passes each to the
    58  // given callback.
    59  func doForEachDir(path string, cb func(dir string)) {
    60  	dir := filepath.Dir(path)
    61  
    62  	for {
    63  		cb(dir)
    64  
    65  		if dir == "/" || dir == "." {
    66  			return
    67  		}
    68  
    69  		dir = filepath.Dir(dir)
    70  	}
    71  }
    72  
    73  // add will auto-vivify a summary for the given directory path and call
    74  // add(size) on it.
    75  func (store dirStore) add(path string, size int64) {
    76  	s, ok := store[path]
    77  	if !ok {
    78  		s = &summary{}
    79  		store[path] = s
    80  	}
    81  
    82  	s.add(size)
    83  }
    84  
    85  // sort returns a slice of our summary values, sorted by our directory path
    86  // keys which are also returned.
    87  func (store dirStore) sort() ([]string, []*summary) {
    88  	return sortSummaryStore(store)
    89  }
    90  
    91  // sortSummaryStore returns a slice of the store's values, sorted by the store's
    92  // keys which are also returned.
    93  func sortSummaryStore[T any](store map[string]*T) ([]string, []*T) {
    94  	keys := make([]string, len(store))
    95  	i := 0
    96  
    97  	for k := range store {
    98  		keys[i] = k
    99  		i++
   100  	}
   101  
   102  	sort.Strings(keys)
   103  
   104  	s := make([]*T, len(store))
   105  
   106  	for i, k := range keys {
   107  		s[i] = store[k]
   108  	}
   109  
   110  	return keys, s
   111  }
   112  
   113  // groupStore is a sortable map of gid to dirStore.
   114  type groupStore map[uint32]dirStore
   115  
   116  // getDirStore auto-vivifies a dirStore for the given gid and returns it.
   117  func (store groupStore) getDirStore(gid uint32) dirStore {
   118  	dStore, ok := store[gid]
   119  	if !ok {
   120  		dStore = make(dirStore)
   121  		store[gid] = dStore
   122  	}
   123  
   124  	return dStore
   125  }
   126  
   127  // sort returns a slice of our dirStore values, sorted by our gid keys converted
   128  // to group names, which are also returned.
   129  //
   130  // If a gid is invalid, the name will be id[gid].
   131  //
   132  // If you will be sorting multiple different groupStores, supply them all the
   133  // same gidLookupCache which is used to minimise gid to name lookups.
   134  func (store groupStore) sort(gidLookupCache map[uint32]string) ([]string, []dirStore) {
   135  	byGroupName := make(map[string]dirStore)
   136  
   137  	for gid, dStore := range store {
   138  		byGroupName[gidToName(gid, gidLookupCache)] = dStore
   139  	}
   140  
   141  	keys := make([]string, len(byGroupName))
   142  	i := 0
   143  
   144  	for k := range byGroupName {
   145  		keys[i] = k
   146  		i++
   147  	}
   148  
   149  	sort.Strings(keys)
   150  
   151  	s := make([]dirStore, len(byGroupName))
   152  
   153  	for i, k := range keys {
   154  		s[i] = byGroupName[k]
   155  	}
   156  
   157  	return keys, s
   158  }
   159  
   160  // gidToName converts gid to group name, using the given cache to avoid lookups.
   161  func gidToName(gid uint32, cache map[uint32]string) string {
   162  	return cachedIDToName(gid, cache, getGroupName)
   163  }
   164  
   165  func cachedIDToName(id uint32, cache map[uint32]string, lookup func(uint32) string) string {
   166  	if name, ok := cache[id]; ok {
   167  		return name
   168  	}
   169  
   170  	name := lookup(id)
   171  
   172  	cache[id] = name
   173  
   174  	return name
   175  }
   176  
   177  // getGroupName returns the name of the group given gid. If the lookup fails,
   178  // returns "idxxx", where xxx is the given id as a string.
   179  func getGroupName(id uint32) string {
   180  	sid := strconv.Itoa(int(id))
   181  
   182  	g, err := user.LookupGroupId(sid)
   183  	if err != nil {
   184  		return "id" + sid
   185  	}
   186  
   187  	return g.Name
   188  }
   189  
   190  // userStore is a sortable map of uid to groupStore.
   191  type userStore map[uint32]groupStore
   192  
   193  // DirStore auto-vivifies an entry in our store for the given uid and gid and
   194  // returns it.
   195  func (store userStore) DirStore(uid, gid uint32) dirStore {
   196  	return store.getGroupStore(uid).getDirStore(gid)
   197  }
   198  
   199  // getGroupStore auto-vivifies a groupStore for the given uid and returns it.
   200  func (store userStore) getGroupStore(uid uint32) groupStore {
   201  	gStore, ok := store[uid]
   202  	if !ok {
   203  		gStore = make(groupStore)
   204  		store[uid] = gStore
   205  	}
   206  
   207  	return gStore
   208  }
   209  
   210  // sort returns a slice of our groupStore values, sorted by our uid keys
   211  // converted to user names, which are also returned. If uid has no user name,
   212  // user name will be id[uid].
   213  func (store userStore) sort() ([]string, []groupStore) {
   214  	byUserName := make(map[string]groupStore)
   215  
   216  	for uid, gids := range store {
   217  		byUserName[getUserName(uid)] = gids
   218  	}
   219  
   220  	keys := make([]string, len(byUserName))
   221  	i := 0
   222  
   223  	for k := range byUserName {
   224  		keys[i] = k
   225  		i++
   226  	}
   227  
   228  	sort.Strings(keys)
   229  
   230  	s := make([]groupStore, len(byUserName))
   231  
   232  	for i, k := range keys {
   233  		s[i] = byUserName[k]
   234  	}
   235  
   236  	return keys, s
   237  }
   238  
   239  // getUserName returns the username of the given uid. If the lookup fails,
   240  // returns "idxxx", where xxx is the given id as a string.
   241  func getUserName(id uint32) string {
   242  	sid := strconv.Itoa(int(id))
   243  
   244  	u, err := user.LookupId(sid)
   245  	if err != nil {
   246  		return "id" + sid
   247  	}
   248  
   249  	return u.Username
   250  }
   251  
   252  // Usergroup is used to summarise file stats by user and group.
   253  type Usergroup struct {
   254  	store userStore
   255  }
   256  
   257  // NewByUserGroup returns a Usergroup.
   258  func NewByUserGroup() *Usergroup {
   259  	return &Usergroup{
   260  		store: make(userStore),
   261  	}
   262  }
   263  
   264  // Add is a github.com/wtsi-ssg/wrstat/stat Operation. It will break path in to
   265  // its directories and add the file size and increment the file count to each,
   266  // summed for the info's user and group. If path is a directory, it is ignored.
   267  func (u *Usergroup) Add(path string, info fs.FileInfo) error {
   268  	if info.IsDir() {
   269  		return nil
   270  	}
   271  
   272  	stat, ok := info.Sys().(*syscall.Stat_t)
   273  	if !ok {
   274  		return errNotUnix
   275  	}
   276  
   277  	dStore := u.store.DirStore(stat.Uid, stat.Gid)
   278  
   279  	dStore.addForEachDir(path, info.Size())
   280  
   281  	return nil
   282  }
   283  
   284  // Output will write summary information for all the paths previously added. The
   285  // format is (tab separated):
   286  //
   287  // username group directory filecount filesize
   288  //
   289  // usernames, groups and directories are sorted.
   290  //
   291  // Returns an error on failure to write, or if username or group can't be
   292  // determined from the uids and gids in the added file info. output is closed
   293  // on completion.
   294  func (u *Usergroup) Output(output StringCloser) error {
   295  	users, gStores := u.store.sort()
   296  
   297  	gidLookupCache := make(map[uint32]string)
   298  
   299  	for i, username := range users {
   300  		if err := outputGroupDirectorySummariesForUser(output, username, gStores[i], gidLookupCache); err != nil {
   301  			return err
   302  		}
   303  	}
   304  
   305  	return output.Close()
   306  }
   307  
   308  // outputGroupDirectorySummariesForUser sortes the groups for this user and
   309  // calls outputDirectorySummariesForGroup.
   310  func outputGroupDirectorySummariesForUser(output StringCloser, username string,
   311  	gStore groupStore, gidLookupCache map[uint32]string) error {
   312  	groupnames, dStores := gStore.sort(gidLookupCache)
   313  
   314  	for i, groupname := range groupnames {
   315  		if err := outputDirectorySummariesForGroup(output, username, groupname, dStores[i]); err != nil {
   316  			return err
   317  		}
   318  	}
   319  
   320  	return nil
   321  }
   322  
   323  // outputDirectorySummariesForGroup sorts the directories for this group and
   324  // does the actual output of all the summary information.
   325  func outputDirectorySummariesForGroup(output StringCloser, username, groupname string, dStore dirStore) error {
   326  	dirs, summaries := dStore.sort()
   327  
   328  	for i, s := range summaries {
   329  		_, errw := output.WriteString(fmt.Sprintf("%s\t%s\t%s\t%d\t%d\n", username, groupname, dirs[i], s.count, s.size))
   330  		if errw != nil {
   331  			return errw
   332  		}
   333  	}
   334  
   335  	return nil
   336  }