github.com/wtsi-ssg/wrstat/v3@v3.2.3/combine/usergroup.go (about)

     1  /*******************************************************************************
     2   * Copyright (c) 2022 Genome Research Ltd.
     3   *
     4   * Author: Sendu Bala <sb10@sanger.ac.uk>
     5   * 		   Kyle Mace  <km34@sanger.ac.uk>
     6   *
     7   * Permission is hereby granted, free of charge, to any person obtaining
     8   * a copy of this software and associated documentation files (the
     9   * "Software"), to deal in the Software without restriction, including
    10   * without limitation the rights to use, copy, modify, merge, publish,
    11   * distribute, sublicense, and/or sell copies of the Software, and to
    12   * permit persons to whom the Software is furnished to do so, subject to
    13   * the following conditions:
    14   *
    15   * The above copyright notice and this permission notice shall be included
    16   * in all copies or substantial portions of the Software.
    17   *
    18   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    19   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    20   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    21   * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    22   * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    23   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    24   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    25   ******************************************************************************/
    26  
    27  package combine
    28  
    29  import (
    30  	"io"
    31  	"os"
    32  	"strconv"
    33  )
    34  
    35  const userGroupSumCols = 3
    36  const intBase = 10
    37  
    38  // UserGroupFiles combines pre-sorted .byusergroup file data, by merging lines
    39  // that share the same first 3 column values in to a single line, and
    40  // compressing the output.
    41  func UserGroupFiles(inputs []*os.File, output *os.File) error {
    42  	return MergeAndCompress(inputs, output, mergeUserGroupStream)
    43  }
    44  
    45  // mergeUserGroupStream is a Merger that sums when the first 3 columns match.
    46  func mergeUserGroupStream(data io.ReadCloser, output io.Writer) error {
    47  	return MergeSummaryLines(data, userGroupSumCols, numSummaryColumns, sumCountAndSize, output)
    48  }
    49  
    50  // sumCountAndSize is a matchingSummaryLineMerger that, given cols 2,  will sum
    51  // the second to last element of a and b and store the result in a[penultimate],
    52  // and likewise for the last element in a[last]. This corresponds to summing the
    53  // file count and size columns of 2 lines in a by* file.
    54  func sumCountAndSize(cols int, a, b []string) {
    55  	last := len(a) - (cols - 1)
    56  	penultimate := last - 1
    57  
    58  	a[penultimate] = addNumberStrings(a[penultimate], b[penultimate])
    59  	a[last] = addNumberStrings(a[last], b[last])
    60  }
    61  
    62  // addNumberStrings treats a and b as ints, adds them together, and returns the
    63  // resulting int64 as a string.
    64  func addNumberStrings(a, b string) string {
    65  	return strconv.FormatInt(atoi(a)+atoi(b), intBase)
    66  }
    67  
    68  // atoi is like strconv.Atoi but returns an int64 and dies on error.
    69  func atoi(n string) int64 {
    70  	i, err := strconv.ParseInt(n, intBase, 0)
    71  	if err != nil {
    72  		panic("Was not able to convert string.")
    73  	}
    74  
    75  	return i
    76  }