github.com/wtsi-ssg/wrstat@v1.1.4-0.20221008232152-3030622a8cf8/dgut/tree.go (about) 1 /******************************************************************************* 2 * Copyright (c) 2022 Genome Research Ltd. 3 * 4 * Author: Sendu Bala <sb10@sanger.ac.uk> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 ******************************************************************************/ 25 26 package dgut 27 28 import ( 29 "sort" 30 "time" 31 32 "github.com/wtsi-ssg/wrstat/summary" 33 ) 34 35 // Tree is used to do high-level queries on DB.Store() database files. 36 type Tree struct { 37 db *DB 38 } 39 40 // NewTree, given the paths to one or more dgut database files (as created by 41 // DB.Store()), returns a *Tree that can be used to do high-level queries on the 42 // stats of a tree of disk folders. You should Close() the tree after use. 43 func NewTree(paths ...string) (*Tree, error) { 44 db := NewDB(paths...) 45 46 if err := db.Open(); err != nil { 47 return nil, err 48 } 49 50 return &Tree{db: db}, nil 51 } 52 53 // DirSummary holds nested file count, size and atime information on a 54 // directory. It also holds which users and groups own files nested under the 55 // directory, and what the file types are. 56 type DirSummary struct { 57 Dir string 58 Count uint64 59 Size uint64 60 Atime time.Time 61 UIDs []uint32 62 GIDs []uint32 63 FTs []summary.DirGUTFileType 64 } 65 66 // DCSs is a Size-sortable slice of DirSummary. 67 type DCSs []*DirSummary 68 69 func (d DCSs) Len() int { 70 return len(d) 71 } 72 func (d DCSs) Swap(i, j int) { 73 d[i], d[j] = d[j], d[i] 74 } 75 func (d DCSs) Less(i, j int) bool { 76 return d[i].Size > d[j].Size 77 } 78 79 // SortByDir sorts by Dir instead of Size. 80 func (d DCSs) SortByDir() { 81 sort.Slice(d, func(i, j int) bool { 82 return d[i].Dir < d[j].Dir 83 }) 84 } 85 86 // DirInfo holds nested file count, size, UID and GID information on a 87 // directory, and also its immediate child directories. 88 type DirInfo struct { 89 Current *DirSummary 90 Children []*DirSummary 91 } 92 93 // IsSameAsChild tells you if this DirInfo has only 1 child, and the child 94 // has the same file count. Ie. our child contains the same files as us. 95 func (d *DirInfo) IsSameAsChild() bool { 96 return len(d.Children) == 1 && d.Children[0].Count == d.Current.Count 97 } 98 99 // DirInfo tells you the total number of files and their total size nested under 100 // the given directory, along with the UIDs and GIDs that own those files. 101 // See GUTs.Summary for an explanation of the filter. 102 // 103 // It also tells you the same information about the immediate child directories 104 // of the given directory (if the children have files in them that pass the 105 // filter). 106 // 107 // Returns an error if dir doesn't exist. 108 func (t *Tree) DirInfo(dir string, filter *Filter) (*DirInfo, error) { 109 dcs, err := t.getSummaryInfo(dir, filter) 110 if err != nil { 111 return nil, err 112 } 113 114 di := &DirInfo{ 115 Current: dcs, 116 } 117 118 children := t.db.Children(di.Current.Dir) 119 err = t.addChildInfo(di, children, filter) 120 121 return di, err 122 } 123 124 // DirHasChildren tells you if the given directory has any child directories 125 // with files in them that pass the filter. See GUTs.Summary for an explanation 126 // of the filter. 127 func (t *Tree) DirHasChildren(dir string, filter *Filter) bool { 128 children := t.db.Children(dir) 129 130 for _, child := range children { 131 ds, _ := t.getSummaryInfo(child, filter) //nolint:errcheck 132 133 if ds.Count > 0 { 134 return true 135 } 136 } 137 138 return false 139 } 140 141 // getSummaryInfo accesses the database to retrieve the count, size and atime 142 // info for a given directory and filter, along with the UIDs and GIDs that own 143 // those files, the file types of those files. 144 func (t *Tree) getSummaryInfo(dir string, filter *Filter) (*DirSummary, error) { 145 c, s, a, u, g, fts, err := t.db.DirInfo(dir, filter) 146 if err != nil { 147 return nil, err 148 } 149 150 return &DirSummary{ 151 Dir: dir, 152 Count: c, 153 Size: s, 154 Atime: time.Unix(a, 0), 155 UIDs: u, 156 GIDs: g, 157 FTs: fts, 158 }, nil 159 } 160 161 // addChildInfo adds DirSummary info of the given child paths to the di's 162 // Children. If a child dir has no files in it, it is ignored. 163 func (t *Tree) addChildInfo(di *DirInfo, children []string, filter *Filter) error { 164 for _, child := range children { 165 dcs, errc := t.getSummaryInfo(child, filter) 166 if errc != nil { 167 return errc 168 } 169 170 if dcs.Count > 0 { 171 di.Children = append(di.Children, dcs) 172 } 173 } 174 175 return nil 176 } 177 178 // Where tells you where files are nested under dir that pass the filter. With a 179 // depth of 0 it only returns the single deepest directory that has all passing 180 // files nested under it. 181 // 182 // With a depth of 1, it also returns the results that calling Where() with a 183 // depth of 0 on each of the deepest directory's children would give. And so on 184 // recursively for higher depths. 185 // 186 // See GUTs.Summary for an explanation of the filter. 187 // 188 // For example, if all user 354's files are in the directories /a/b/c/d (2 189 // files), /a/b/c/d/1 (1 files), /a/b/c/d/2 (2 files) and /a/b/e/f/g (2 files), 190 // Where("/", &Filter{UIDs: []uint32{354}}, 0) would tell you that "/a/b" has 7 191 // files. With a depth of 1 it would tell you that "/a/b" has 7 files, 192 // "/a/b/c/d" has 5 files and "/a/b/e/f/g" has 2 files. With a depth of 2 it 193 // would tell you that "/a/b" has 7 files, "/a/b/c/d" has 5 files, "/a/b/c/d/1" 194 // has 1 file, "/a/b/c/d/2" has 2 files, and "/a/b/e/f/g" has 2 files. 195 // 196 // The returned DirSummarys are sorted by Size, largest first. 197 // 198 // Returns an error if dir doesn't exist. 199 func (t *Tree) Where(dir string, filter *Filter, depth int) (DCSs, error) { 200 var dcss DCSs 201 202 di, err := t.where0(dir, filter) 203 if err != nil { 204 return nil, err 205 } 206 207 dcss = append(dcss, di.Current) 208 209 children := di.Children 210 211 for i := 0; i < depth; i++ { 212 var theseChildren []*DirSummary 213 214 for _, dcs := range children { 215 // where0 can't return an error here, because we're supplying it a 216 // directory name that came from the database. 217 //nolint:errcheck 218 diChild, _ := t.where0(dcs.Dir, filter) 219 dcss = append(dcss, diChild.Current) 220 theseChildren = append(theseChildren, diChild.Children...) 221 } 222 223 children = theseChildren 224 } 225 226 sort.Sort(dcss) 227 228 return dcss, nil 229 } 230 231 // where0 is the implementation of Where() for a depth of 0. 232 func (t *Tree) where0(dir string, filter *Filter) (*DirInfo, error) { 233 di, err := t.DirInfo(dir, filter) 234 if err != nil { 235 return nil, err 236 } 237 238 for di.IsSameAsChild() { 239 // DirInfo can't return an error here, because we're supplying it a 240 // directory name that came from the database. 241 //nolint:errcheck 242 di, _ = t.DirInfo(di.Children[0].Dir, filter) 243 } 244 245 return di, nil 246 } 247 248 // FileLocations, starting from the given dir, finds the first directory that 249 // directly contains filter-passing files along every branch from dir. 250 // 251 // See GUTs.Summary for an explanation of the filter. 252 // 253 // The results are returned sorted by directory. 254 func (t *Tree) FileLocations(dir string, filter *Filter) (DCSs, error) { 255 var dcss DCSs 256 257 di, err := t.DirInfo(dir, filter) 258 if err != nil { 259 return nil, err 260 } 261 262 var childCount uint64 263 264 for _, child := range di.Children { 265 childCount += child.Count 266 } 267 268 if childCount < di.Current.Count { 269 dcss = append(dcss, di.Current) 270 271 return dcss, nil 272 } 273 274 for _, child := range di.Children { 275 // FileLocations can't return an error here, because we're supplying it 276 // a directory name that came from the database. 277 //nolint:errcheck 278 childDCSs, _ := t.FileLocations(child.Dir, filter) 279 dcss = append(dcss, childDCSs...) 280 } 281 282 dcss.SortByDir() 283 284 return dcss, nil 285 } 286 287 // Close should be called after you've finished querying the tree to release its 288 // database locks. 289 func (t *Tree) Close() { 290 if t.db != nil { 291 t.db.Close() 292 } 293 }