github.com/wtsi-ssg/wrstat@v1.1.4-0.20221008232152-3030622a8cf8/pathsize/parse.go (about)

     1  /*******************************************************************************
     2   * Copyright (c) 2022 Genome Research Ltd.
     3   *
     4   * Author: Sendu Bala <sb10@sanger.ac.uk>
     5   *
     6   * Permission is hereby granted, free of charge, to any person obtaining
     7   * a copy of this software and associated documentation files (the
     8   * "Software"), to deal in the Software without restriction, including
     9   * without limitation the rights to use, copy, modify, merge, publish,
    10   * distribute, sublicense, and/or sell copies of the Software, and to
    11   * permit persons to whom the Software is furnished to do so, subject to
    12   * the following conditions:
    13   *
    14   * The above copyright notice and this permission notice shall be included
    15   * in all copies or substantial portions of the Software.
    16   *
    17   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    18   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    19   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    20   * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    21   * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    22   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    23   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    24   ******************************************************************************/
    25  
    26  package pathsize
    27  
    28  import (
    29  	"bufio"
    30  	"errors"
    31  	"io"
    32  	"strconv"
    33  	"strings"
    34  )
    35  
    36  type ParseError string
    37  
    38  func (e ParseError) Error() string { return "the provided data was not in size format: " + string(e) }
    39  
    40  const (
    41  	ErrBlankLine     = Error("the provided line had no information")
    42  	pathsizeDataCols = 2
    43  )
    44  
    45  type pathsizeParserCallBack func(*PathSize)
    46  
    47  // parsePathSizeLines will parse the given size file data (as generated by
    48  // stat.SizeOperation()) and send *PathSize structs to your callback.
    49  //
    50  // Each *PathSize will correspond to a line from the size file. Your callback
    51  // will receive exactly 1 *PathSize per line.
    52  //
    53  // Any issues with parsing the size file data will result in this method
    54  // returning an error.
    55  func parsePathSizeLines(data io.Reader, cb pathsizeParserCallBack) error {
    56  	scanner := bufio.NewScanner(data)
    57  	for scanner.Scan() {
    58  		path, size, err := parsePathSizeLine(scanner.Text())
    59  		if err != nil {
    60  			if errors.Is(err, ErrBlankLine) {
    61  				continue
    62  			}
    63  
    64  			return err
    65  		}
    66  
    67  		cb(&PathSize{Path: path, Size: size})
    68  	}
    69  
    70  	return scanner.Err()
    71  }
    72  
    73  // parsePathSizeLine parses a line of stat.SizeOperation() into a path and size.
    74  //
    75  // Returns an error if line didn't have the expected format.
    76  func parsePathSizeLine(line string) (string, uint64, error) {
    77  	parts, err := splitPathSizeLine(line)
    78  	if err != nil {
    79  		return "", 0, err
    80  	}
    81  
    82  	if parts[0] == "" {
    83  		return "", 0, ErrBlankLine
    84  	}
    85  
    86  	size, err := strconv.ParseUint(parts[1], 10, 64)
    87  
    88  	return parts[0], size, err
    89  }
    90  
    91  // splitPathSizeLine trims the \n from line and splits it in to 2 columns.
    92  func splitPathSizeLine(line string) ([]string, error) {
    93  	line = strings.TrimSuffix(line, "\n")
    94  
    95  	parts := strings.Split(line, "\t")
    96  
    97  	if len(parts) > pathsizeDataCols {
    98  		// assume the filename had tabs in it
    99  		size, path := parts[len(parts)-1], parts[:len(parts)-1]
   100  		parts = []string{strings.Join(path, "\t"), size}
   101  	}
   102  
   103  	if len(parts) != pathsizeDataCols {
   104  		return nil, ParseError(line)
   105  	}
   106  
   107  	return parts, nil
   108  }