github.com/wtsi-ssg/wrstat/v4@v4.5.1/basedirs/basedirs.go (about)

     1  /*******************************************************************************
     2   * Copyright (c) 2022, 2023 Genome Research Ltd.
     3   *
     4   * Authors:
     5   *   Sendu Bala <sb10@sanger.ac.uk>
     6   *   Michael Woolnough <mw31@sanger.ac.uk>
     7   *
     8   * Permission is hereby granted, free of charge, to any person obtaining
     9   * a copy of this software and associated documentation files (the
    10   * "Software"), to deal in the Software without restriction, including
    11   * without limitation the rights to use, copy, modify, merge, publish,
    12   * distribute, sublicense, and/or sell copies of the Software, and to
    13   * permit persons to whom the Software is furnished to do so, subject to
    14   * the following conditions:
    15   *
    16   * The above copyright notice and this permission notice shall be included
    17   * in all copies or substantial portions of the Software.
    18   *
    19   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    20   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    21   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    22   * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    23   * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    24   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    25   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    26   ******************************************************************************/
    27  
    28  // package basedirs is used to summarise disk usage information by base
    29  // directory, storing and retrieving the information from an embedded database.
    30  
    31  package basedirs
    32  
    33  import (
    34  	"regexp"
    35  	"strings"
    36  
    37  	"github.com/ugorji/go/codec"
    38  	"github.com/wtsi-ssg/wrstat/v4/dgut"
    39  )
    40  
    41  const (
    42  	basedirSplits     = 4
    43  	basedirMinDirs    = 4
    44  	basedirMinDirsMDT = 5
    45  )
    46  
    47  var basedirMDTRegexp = regexp.MustCompile(`\/mdt\d(\/|\z)`)
    48  
    49  // BaseDirs is used to summarise disk usage information by base directory and
    50  // group or user.
    51  type BaseDirs struct {
    52  	dbPath      string
    53  	tree        *dgut.Tree
    54  	quotas      *Quotas
    55  	ch          codec.Handle
    56  	mountPoints mountPoints
    57  }
    58  
    59  // NewCreator returns a BaseDirs that lets you create a database summarising
    60  // usage information by base directory, taken from the given tree and quotas.
    61  func NewCreator(dbPath string, tree *dgut.Tree, quotas *Quotas) (*BaseDirs, error) {
    62  	mp, err := getMountPoints()
    63  	if err != nil {
    64  		return nil, err
    65  	}
    66  
    67  	return &BaseDirs{
    68  		dbPath:      dbPath,
    69  		tree:        tree,
    70  		quotas:      quotas,
    71  		ch:          new(codec.BincHandle),
    72  		mountPoints: mp,
    73  	}, nil
    74  }
    75  
    76  // SetMountPoints can be used to manually set your mountpoints, if the automatic
    77  // discovery of mountpoints on your system doesn't work.
    78  func (b *BaseDirs) SetMountPoints(mountpoints []string) {
    79  	b.mountPoints = mountpoints
    80  }
    81  
    82  // CalculateForGroup calculates all the base directories for the given group.
    83  func (b *BaseDirs) CalculateForGroup(gid uint32) (dgut.DCSs, error) {
    84  	var dcss dgut.DCSs
    85  
    86  	if err := b.filterWhereResults(&dgut.Filter{GIDs: []uint32{gid}}, func(ds *dgut.DirSummary) {
    87  		dcss = append(dcss, ds)
    88  	}); err != nil {
    89  		return nil, err
    90  	}
    91  
    92  	return dcss, nil
    93  }
    94  
    95  func (b *BaseDirs) filterWhereResults(filter *dgut.Filter, cb func(ds *dgut.DirSummary)) error {
    96  	dcss, err := b.tree.Where("/", filter, basedirSplits)
    97  	if err != nil {
    98  		return err
    99  	}
   100  
   101  	dcss.SortByDir()
   102  
   103  	var previous string
   104  
   105  	for _, ds := range dcss {
   106  		if notEnoughDirs(ds.Dir) || childOfPreviousResult(ds.Dir, previous) {
   107  			continue
   108  		}
   109  
   110  		cb(ds)
   111  
   112  		// used to be `dirs = append(dirs, ds.Dir)`
   113  		// then for each dir, `outFile.WriteString(fmt.Sprintf("%d\t%s\n", gid, dir))`
   114  
   115  		previous = ds.Dir
   116  	}
   117  
   118  	return nil
   119  }
   120  
   121  // notEnoughDirs returns true if the given path has fewer than 4 directories.
   122  // If path has an mdt directory in it, then it becomes 5 directories.
   123  func notEnoughDirs(path string) bool {
   124  	numDirs := strings.Count(path, "/")
   125  
   126  	min := basedirMinDirs
   127  	if basedirMDTRegexp.MatchString(path) {
   128  		min = basedirMinDirsMDT
   129  	}
   130  
   131  	return numDirs < min
   132  }
   133  
   134  // childOfPreviousResult returns true if previous is not blank, and dir starts
   135  // with it.
   136  func childOfPreviousResult(dir, previous string) bool {
   137  	return previous != "" && strings.HasPrefix(dir, previous)
   138  }
   139  
   140  // CalculateForUser calculates all the base directories for the given user.
   141  func (b *BaseDirs) CalculateForUser(uid uint32) (dgut.DCSs, error) {
   142  	var dcss dgut.DCSs
   143  
   144  	if err := b.filterWhereResults(&dgut.Filter{UIDs: []uint32{uid}}, func(ds *dgut.DirSummary) {
   145  		dcss = append(dcss, ds)
   146  	}); err != nil {
   147  		return nil, err
   148  	}
   149  
   150  	return dcss, nil
   151  }