github.com/wtsi-ssg/wrstat/v4@v4.5.1/cmd/combine.go (about) 1 /******************************************************************************* 2 * Copyright (c) 2021-2022 Genome Research Ltd. 3 * 4 * Author: Sendu Bala <sb10@sanger.ac.uk> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 ******************************************************************************/ 25 26 package cmd 27 28 import ( 29 "path/filepath" 30 "sync" 31 32 "github.com/spf13/cobra" 33 "github.com/wtsi-ssg/wrstat/v4/combine" 34 "github.com/wtsi-ssg/wrstat/v4/fs" 35 ) 36 37 const combineStatsOutputFileBasename = "combine.stats.gz" 38 const combineUserGroupOutputFileBasename = "combine.byusergroup.gz" 39 const combineGroupOutputFileBasename = "combine.bygroup" 40 const combineDGUTOutputFileBasename = "combine.dgut.db" 41 const combineLogOutputFileBasename = "combine.log.gz" 42 43 // combineCmd represents the combine command. 44 var combineCmd = &cobra.Command{ 45 Use: "combine", 46 Short: "Combine the .stats files produced by 'wrstat walk'", 47 Long: `Combine the .stats files produced by 'wrstat walk'. 48 49 Within the given output directory, all the 'wrstat stat' *.stats files produced 50 following an invocation of 'wrstat walk' will be concatenated, compressed and 51 placed at the root of the output directory in a file called 'combine.stats.gz'. 52 53 Likewise, all the 'wrstat stat' *.byusergroup files will be merged, 54 compressed and placed at the root of the output directory in a file called 55 'combine.byusergroup.gz'. 56 57 The same applies to the *.log files, being called 'combine.log.gz'. 58 59 The *.dugt files will be turned in to databases in a directory 60 'combine.dgut.db'. 61 62 The *.bygroup files are merged but not compressed and called 'combine.bygroup'. 63 64 NB: only call this by adding it to wr with a dependency on the dependency group 65 you supplied 'wrstat walk'.`, 66 Run: func(cmd *cobra.Command, args []string) { 67 if len(args) != 1 { 68 die("exactly 1 'wrstat walk' output directory must be supplied") 69 } 70 71 sourceDir, err := filepath.Abs(args[0]) 72 if err != nil { 73 die("could not get the absolute path to [%s]: %s", args[0], err) 74 } 75 76 var wg sync.WaitGroup 77 78 wg.Add(1) 79 go func() { 80 defer wg.Done() 81 concatenateAndCompressStatsFiles(sourceDir) 82 }() 83 84 wg.Add(1) 85 go func() { 86 defer wg.Done() 87 mergeAndCompressUserGroupFiles(sourceDir) 88 }() 89 90 wg.Add(1) 91 go func() { 92 defer wg.Done() 93 mergeGroupFiles(sourceDir) 94 }() 95 96 wg.Add(1) 97 go func() { 98 defer wg.Done() 99 mergeDGUTFilesToDB(sourceDir) 100 }() 101 102 wg.Add(1) 103 go func() { 104 defer wg.Done() 105 concatenateAndCompressLogFiles(sourceDir) 106 }() 107 108 wg.Wait() 109 }, 110 } 111 112 func init() { 113 RootCmd.AddCommand(combineCmd) 114 } 115 116 // concatenateAndCompressStatsFiles finds and concatenates the stats files and 117 // compresses the output. 118 func concatenateAndCompressStatsFiles(sourceDir string) { 119 inputFiles, outputFile, err := fs.FindOpenAndCreate(sourceDir, sourceDir, statOutputFileSuffix, 120 combineStatsOutputFileBasename) 121 if err != nil { 122 die("failed to find, open or create stats files: %s", err) 123 } 124 125 if err = combine.StatFiles(inputFiles, outputFile); err != nil { 126 die("failed to concatenate and compress stats files (err: %s)", err) 127 } 128 } 129 130 // mergeAndCompressUserGroupFiles finds and merges the byusergroup files and 131 // compresses the output. 132 func mergeAndCompressUserGroupFiles(sourceDir string) { 133 inputFiles, outputFile, err := fs.FindOpenAndCreate(sourceDir, 134 sourceDir, statUserGroupSummaryOutputFileSuffix, combineUserGroupOutputFileBasename) 135 if err != nil { 136 die("failed to find, open or create usergroup files: %s", err) 137 } 138 139 if err = combine.UserGroupFiles(inputFiles, outputFile); err != nil { 140 die("failed to merge the user group files: %s", err) 141 } 142 } 143 144 // mergeGroupFiles finds and merges the bygroup files. 145 func mergeGroupFiles(sourceDir string) { 146 inputFiles, outputFile, err := fs.FindOpenAndCreate(sourceDir, sourceDir, 147 statGroupSummaryOutputFileSuffix, combineGroupOutputFileBasename) 148 if err != nil { 149 die("failed to find, open or create group files: %s", err) 150 } 151 152 if err = combine.GroupFiles(inputFiles, outputFile); err != nil { 153 die("failed to merge the group files: %s", err) 154 } 155 } 156 157 // concatenateAndCompressLogFiles finds and merges the log files and compresses the 158 // output. 159 func concatenateAndCompressLogFiles(sourceDir string) { 160 inputFiles, outputFile, err := fs.FindOpenAndCreate(sourceDir, 161 sourceDir, statLogOutputFileSuffix, combineLogOutputFileBasename) 162 if err != nil { 163 die("failed to find, open or create log files: %s", err) 164 } 165 166 if err := combine.LogFiles(inputFiles, outputFile); err != nil { 167 die("failed to merge the log files: %s", err) 168 } 169 } 170 171 // mergeDGUTFilesToDB finds and merges the dgut files and then stores the 172 // information in a database. 173 func mergeDGUTFilesToDB(sourceDir string) { 174 paths, err := fs.FindFilePathsInDir(sourceDir, statDGUTSummaryOutputFileSuffix) 175 if err != nil { 176 die("failed to find the dgut files: %s", err) 177 } 178 179 outputDir := filepath.Join(sourceDir, combineDGUTOutputFileBasename) 180 181 if err = fs.RemoveAndCreateDir(outputDir); err != nil { 182 die("failed to remove or create the dgut directory: %s", err) 183 } 184 185 if err = combine.DgutFiles(paths, outputDir); err != nil { 186 die("failed to merge the dgut files: %s", err) 187 } 188 }