github.com/wtsi-ssg/wrstat@v1.1.4-0.20221008232152-3030622a8cf8/dgut/parse.go (about)

     1  /*******************************************************************************
     2   * Copyright (c) 2022 Genome Research Ltd.
     3   *
     4   * Author: Sendu Bala <sb10@sanger.ac.uk>
     5   *
     6   * Permission is hereby granted, free of charge, to any person obtaining
     7   * a copy of this software and associated documentation files (the
     8   * "Software"), to deal in the Software without restriction, including
     9   * without limitation the rights to use, copy, modify, merge, publish,
    10   * distribute, sublicense, and/or sell copies of the Software, and to
    11   * permit persons to whom the Software is furnished to do so, subject to
    12   * the following conditions:
    13   *
    14   * The above copyright notice and this permission notice shall be included
    15   * in all copies or substantial portions of the Software.
    16   *
    17   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    18   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    19   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    20   * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    21   * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    22   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    23   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    24   ******************************************************************************/
    25  
    26  package dgut
    27  
    28  import (
    29  	"bufio"
    30  	"errors"
    31  	"io"
    32  	"strconv"
    33  	"strings"
    34  
    35  	"github.com/wtsi-ssg/wrstat/summary"
    36  )
    37  
    38  type Error string
    39  
    40  func (e Error) Error() string { return string(e) }
    41  
    42  const ErrInvalidFormat = Error("the provided data was not in dgut format")
    43  const ErrBlankLine = Error("the provided line had no information")
    44  
    45  const (
    46  	gutDataCols    = 7
    47  	gutDataIntCols = 6
    48  )
    49  
    50  type dgutParserCallBack func(*DGUT)
    51  
    52  // parseDGUTLines will parse the given dgut file data (as output by
    53  // summary.DirGroupUserType.Output()) and send *DGUT structs to your callback.
    54  //
    55  // Each *DGUT will correspond to one of the directories in your dgut file data,
    56  // and contain all the *GUT information for that directory. Your callback will
    57  // receive exactly 1 *DGUT per unique directory. (This relies on the dgut file
    58  // data being sorted, as it normally would be.)
    59  //
    60  // Any issues with parsing the dgut file data will result in this method
    61  // returning an error.
    62  func parseDGUTLines(data io.Reader, cb dgutParserCallBack) error {
    63  	dgut, guts := &DGUT{}, []*GUT{}
    64  
    65  	scanner := bufio.NewScanner(data)
    66  	for scanner.Scan() {
    67  		thisDir, g, err := parseDGUTLine(scanner.Text())
    68  		if err != nil {
    69  			if errors.Is(err, ErrBlankLine) {
    70  				continue
    71  			}
    72  
    73  			return err
    74  		}
    75  
    76  		if thisDir != dgut.Dir {
    77  			populateAndEmitDGUT(dgut, guts, cb)
    78  			dgut, guts = &DGUT{Dir: thisDir}, []*GUT{}
    79  		}
    80  
    81  		guts = append(guts, g)
    82  	}
    83  
    84  	if dgut.Dir != "" {
    85  		dgut.GUTs = guts
    86  		cb(dgut)
    87  	}
    88  
    89  	return scanner.Err()
    90  }
    91  
    92  // populateAndEmitDGUT adds guts to dguts and sends dgut to cb, but only if
    93  // the dgut has a Dir.
    94  func populateAndEmitDGUT(dgut *DGUT, guts []*GUT, cb dgutParserCallBack) {
    95  	if dgut.Dir != "" {
    96  		dgut.GUTs = guts
    97  		cb(dgut)
    98  	}
    99  }
   100  
   101  // parseDGUTLine parses a line of summary.DirGroupUserType.Output() into a
   102  // directory string and a *dgut for the other information.
   103  //
   104  // Returns an error if line didn't have the expected format.
   105  func parseDGUTLine(line string) (string, *GUT, error) {
   106  	parts, err := splitDGUTLine(line)
   107  	if err != nil {
   108  		return "", nil, err
   109  	}
   110  
   111  	if parts[0] == "" {
   112  		return "", nil, ErrBlankLine
   113  	}
   114  
   115  	ints, err := gutLinePartsToInts(parts)
   116  	if err != nil {
   117  		return "", nil, err
   118  	}
   119  
   120  	return parts[0], &GUT{
   121  		GID:   uint32(ints[0]),
   122  		UID:   uint32(ints[1]),
   123  		FT:    summary.DirGUTFileType(ints[2]),
   124  		Count: ints[3],
   125  		Size:  ints[4],
   126  		Atime: int64(ints[5]),
   127  	}, nil
   128  }
   129  
   130  // splitDGUTLine trims the \n from line and splits it in to 7 columns.
   131  func splitDGUTLine(line string) ([]string, error) {
   132  	line = strings.TrimSuffix(line, "\n")
   133  
   134  	parts := strings.Split(line, "\t")
   135  	if len(parts) != gutDataCols {
   136  		return nil, ErrInvalidFormat
   137  	}
   138  
   139  	return parts, nil
   140  }
   141  
   142  // gutLinePartsToInts takes the output of splitDGUTLine() and returns the last
   143  // 6 columns as ints.
   144  func gutLinePartsToInts(parts []string) ([]uint64, error) {
   145  	ints := make([]uint64, gutDataIntCols)
   146  
   147  	var err error
   148  
   149  	if ints[0], err = strconv.ParseUint(parts[1], 10, 32); err != nil {
   150  		return nil, ErrInvalidFormat
   151  	}
   152  
   153  	if ints[1], err = strconv.ParseUint(parts[2], 10, 32); err != nil {
   154  		return nil, ErrInvalidFormat
   155  	}
   156  
   157  	if ints[2], err = strconv.ParseUint(parts[3], 10, 8); err != nil {
   158  		return nil, ErrInvalidFormat
   159  	}
   160  
   161  	if ints[3], err = strconv.ParseUint(parts[4], 10, 64); err != nil {
   162  		return nil, ErrInvalidFormat
   163  	}
   164  
   165  	if ints[4], err = strconv.ParseUint(parts[5], 10, 64); err != nil {
   166  		return nil, ErrInvalidFormat
   167  	}
   168  
   169  	if ints[5], err = strconv.ParseUint(parts[6], 10, 64); err != nil {
   170  		return nil, ErrInvalidFormat
   171  	}
   172  
   173  	return ints, nil
   174  }