github.com/wtsi-ssg/wrstat@v1.1.4-0.20221008232152-3030622a8cf8/pathsize/parse.go (about) 1 /******************************************************************************* 2 * Copyright (c) 2022 Genome Research Ltd. 3 * 4 * Author: Sendu Bala <sb10@sanger.ac.uk> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 ******************************************************************************/ 25 26 package pathsize 27 28 import ( 29 "bufio" 30 "errors" 31 "io" 32 "strconv" 33 "strings" 34 ) 35 36 type ParseError string 37 38 func (e ParseError) Error() string { return "the provided data was not in size format: " + string(e) } 39 40 const ( 41 ErrBlankLine = Error("the provided line had no information") 42 pathsizeDataCols = 2 43 ) 44 45 type pathsizeParserCallBack func(*PathSize) 46 47 // parsePathSizeLines will parse the given size file data (as generated by 48 // stat.SizeOperation()) and send *PathSize structs to your callback. 49 // 50 // Each *PathSize will correspond to a line from the size file. Your callback 51 // will receive exactly 1 *PathSize per line. 52 // 53 // Any issues with parsing the size file data will result in this method 54 // returning an error. 55 func parsePathSizeLines(data io.Reader, cb pathsizeParserCallBack) error { 56 scanner := bufio.NewScanner(data) 57 for scanner.Scan() { 58 path, size, err := parsePathSizeLine(scanner.Text()) 59 if err != nil { 60 if errors.Is(err, ErrBlankLine) { 61 continue 62 } 63 64 return err 65 } 66 67 cb(&PathSize{Path: path, Size: size}) 68 } 69 70 return scanner.Err() 71 } 72 73 // parsePathSizeLine parses a line of stat.SizeOperation() into a path and size. 74 // 75 // Returns an error if line didn't have the expected format. 76 func parsePathSizeLine(line string) (string, uint64, error) { 77 parts, err := splitPathSizeLine(line) 78 if err != nil { 79 return "", 0, err 80 } 81 82 if parts[0] == "" { 83 return "", 0, ErrBlankLine 84 } 85 86 size, err := strconv.ParseUint(parts[1], 10, 64) 87 88 return parts[0], size, err 89 } 90 91 // splitPathSizeLine trims the \n from line and splits it in to 2 columns. 92 func splitPathSizeLine(line string) ([]string, error) { 93 line = strings.TrimSuffix(line, "\n") 94 95 parts := strings.Split(line, "\t") 96 97 if len(parts) > pathsizeDataCols { 98 // assume the filename had tabs in it 99 size, path := parts[len(parts)-1], parts[:len(parts)-1] 100 parts = []string{strings.Join(path, "\t"), size} 101 } 102 103 if len(parts) != pathsizeDataCols { 104 return nil, ParseError(line) 105 } 106 107 return parts, nil 108 }