github.com/wtsi-ssg/wrstat/v3@v3.2.3/combine/usergroup.go (about) 1 /******************************************************************************* 2 * Copyright (c) 2022 Genome Research Ltd. 3 * 4 * Author: Sendu Bala <sb10@sanger.ac.uk> 5 * Kyle Mace <km34@sanger.ac.uk> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included 16 * in all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 ******************************************************************************/ 26 27 package combine 28 29 import ( 30 "io" 31 "os" 32 "strconv" 33 ) 34 35 const userGroupSumCols = 3 36 const intBase = 10 37 38 // UserGroupFiles combines pre-sorted .byusergroup file data, by merging lines 39 // that share the same first 3 column values in to a single line, and 40 // compressing the output. 41 func UserGroupFiles(inputs []*os.File, output *os.File) error { 42 return MergeAndCompress(inputs, output, mergeUserGroupStream) 43 } 44 45 // mergeUserGroupStream is a Merger that sums when the first 3 columns match. 46 func mergeUserGroupStream(data io.ReadCloser, output io.Writer) error { 47 return MergeSummaryLines(data, userGroupSumCols, numSummaryColumns, sumCountAndSize, output) 48 } 49 50 // sumCountAndSize is a matchingSummaryLineMerger that, given cols 2, will sum 51 // the second to last element of a and b and store the result in a[penultimate], 52 // and likewise for the last element in a[last]. This corresponds to summing the 53 // file count and size columns of 2 lines in a by* file. 54 func sumCountAndSize(cols int, a, b []string) { 55 last := len(a) - (cols - 1) 56 penultimate := last - 1 57 58 a[penultimate] = addNumberStrings(a[penultimate], b[penultimate]) 59 a[last] = addNumberStrings(a[last], b[last]) 60 } 61 62 // addNumberStrings treats a and b as ints, adds them together, and returns the 63 // resulting int64 as a string. 64 func addNumberStrings(a, b string) string { 65 return strconv.FormatInt(atoi(a)+atoi(b), intBase) 66 } 67 68 // atoi is like strconv.Atoi but returns an int64 and dies on error. 69 func atoi(n string) int64 { 70 i, err := strconv.ParseInt(n, intBase, 0) 71 if err != nil { 72 panic("Was not able to convert string.") 73 } 74 75 return i 76 }