github.com/wtsi-ssg/wrstat@v1.1.4-0.20221008232152-3030622a8cf8/dgut/parse.go (about) 1 /******************************************************************************* 2 * Copyright (c) 2022 Genome Research Ltd. 3 * 4 * Author: Sendu Bala <sb10@sanger.ac.uk> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 ******************************************************************************/ 25 26 package dgut 27 28 import ( 29 "bufio" 30 "errors" 31 "io" 32 "strconv" 33 "strings" 34 35 "github.com/wtsi-ssg/wrstat/summary" 36 ) 37 38 type Error string 39 40 func (e Error) Error() string { return string(e) } 41 42 const ErrInvalidFormat = Error("the provided data was not in dgut format") 43 const ErrBlankLine = Error("the provided line had no information") 44 45 const ( 46 gutDataCols = 7 47 gutDataIntCols = 6 48 ) 49 50 type dgutParserCallBack func(*DGUT) 51 52 // parseDGUTLines will parse the given dgut file data (as output by 53 // summary.DirGroupUserType.Output()) and send *DGUT structs to your callback. 54 // 55 // Each *DGUT will correspond to one of the directories in your dgut file data, 56 // and contain all the *GUT information for that directory. Your callback will 57 // receive exactly 1 *DGUT per unique directory. (This relies on the dgut file 58 // data being sorted, as it normally would be.) 59 // 60 // Any issues with parsing the dgut file data will result in this method 61 // returning an error. 62 func parseDGUTLines(data io.Reader, cb dgutParserCallBack) error { 63 dgut, guts := &DGUT{}, []*GUT{} 64 65 scanner := bufio.NewScanner(data) 66 for scanner.Scan() { 67 thisDir, g, err := parseDGUTLine(scanner.Text()) 68 if err != nil { 69 if errors.Is(err, ErrBlankLine) { 70 continue 71 } 72 73 return err 74 } 75 76 if thisDir != dgut.Dir { 77 populateAndEmitDGUT(dgut, guts, cb) 78 dgut, guts = &DGUT{Dir: thisDir}, []*GUT{} 79 } 80 81 guts = append(guts, g) 82 } 83 84 if dgut.Dir != "" { 85 dgut.GUTs = guts 86 cb(dgut) 87 } 88 89 return scanner.Err() 90 } 91 92 // populateAndEmitDGUT adds guts to dguts and sends dgut to cb, but only if 93 // the dgut has a Dir. 94 func populateAndEmitDGUT(dgut *DGUT, guts []*GUT, cb dgutParserCallBack) { 95 if dgut.Dir != "" { 96 dgut.GUTs = guts 97 cb(dgut) 98 } 99 } 100 101 // parseDGUTLine parses a line of summary.DirGroupUserType.Output() into a 102 // directory string and a *dgut for the other information. 103 // 104 // Returns an error if line didn't have the expected format. 105 func parseDGUTLine(line string) (string, *GUT, error) { 106 parts, err := splitDGUTLine(line) 107 if err != nil { 108 return "", nil, err 109 } 110 111 if parts[0] == "" { 112 return "", nil, ErrBlankLine 113 } 114 115 ints, err := gutLinePartsToInts(parts) 116 if err != nil { 117 return "", nil, err 118 } 119 120 return parts[0], &GUT{ 121 GID: uint32(ints[0]), 122 UID: uint32(ints[1]), 123 FT: summary.DirGUTFileType(ints[2]), 124 Count: ints[3], 125 Size: ints[4], 126 Atime: int64(ints[5]), 127 }, nil 128 } 129 130 // splitDGUTLine trims the \n from line and splits it in to 7 columns. 131 func splitDGUTLine(line string) ([]string, error) { 132 line = strings.TrimSuffix(line, "\n") 133 134 parts := strings.Split(line, "\t") 135 if len(parts) != gutDataCols { 136 return nil, ErrInvalidFormat 137 } 138 139 return parts, nil 140 } 141 142 // gutLinePartsToInts takes the output of splitDGUTLine() and returns the last 143 // 6 columns as ints. 144 func gutLinePartsToInts(parts []string) ([]uint64, error) { 145 ints := make([]uint64, gutDataIntCols) 146 147 var err error 148 149 if ints[0], err = strconv.ParseUint(parts[1], 10, 32); err != nil { 150 return nil, ErrInvalidFormat 151 } 152 153 if ints[1], err = strconv.ParseUint(parts[2], 10, 32); err != nil { 154 return nil, ErrInvalidFormat 155 } 156 157 if ints[2], err = strconv.ParseUint(parts[3], 10, 8); err != nil { 158 return nil, ErrInvalidFormat 159 } 160 161 if ints[3], err = strconv.ParseUint(parts[4], 10, 64); err != nil { 162 return nil, ErrInvalidFormat 163 } 164 165 if ints[4], err = strconv.ParseUint(parts[5], 10, 64); err != nil { 166 return nil, ErrInvalidFormat 167 } 168 169 if ints[5], err = strconv.ParseUint(parts[6], 10, 64); err != nil { 170 return nil, ErrInvalidFormat 171 } 172 173 return ints, nil 174 }