github.com/wtsi-ssg/wrstat/v4@v4.5.1/cmd/combine.go (about)

     1  /*******************************************************************************
     2   * Copyright (c) 2021-2022 Genome Research Ltd.
     3   *
     4   * Author: Sendu Bala <sb10@sanger.ac.uk>
     5   *
     6   * Permission is hereby granted, free of charge, to any person obtaining
     7   * a copy of this software and associated documentation files (the
     8   * "Software"), to deal in the Software without restriction, including
     9   * without limitation the rights to use, copy, modify, merge, publish,
    10   * distribute, sublicense, and/or sell copies of the Software, and to
    11   * permit persons to whom the Software is furnished to do so, subject to
    12   * the following conditions:
    13   *
    14   * The above copyright notice and this permission notice shall be included
    15   * in all copies or substantial portions of the Software.
    16   *
    17   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    18   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    19   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    20   * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    21   * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    22   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    23   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    24   ******************************************************************************/
    25  
    26  package cmd
    27  
    28  import (
    29  	"path/filepath"
    30  	"sync"
    31  
    32  	"github.com/spf13/cobra"
    33  	"github.com/wtsi-ssg/wrstat/v4/combine"
    34  	"github.com/wtsi-ssg/wrstat/v4/fs"
    35  )
    36  
    37  const combineStatsOutputFileBasename = "combine.stats.gz"
    38  const combineUserGroupOutputFileBasename = "combine.byusergroup.gz"
    39  const combineGroupOutputFileBasename = "combine.bygroup"
    40  const combineDGUTOutputFileBasename = "combine.dgut.db"
    41  const combineLogOutputFileBasename = "combine.log.gz"
    42  
    43  // combineCmd represents the combine command.
    44  var combineCmd = &cobra.Command{
    45  	Use:   "combine",
    46  	Short: "Combine the .stats files produced by 'wrstat walk'",
    47  	Long: `Combine the .stats files produced by 'wrstat walk'.
    48  	
    49  Within the given output directory, all the 'wrstat stat' *.stats files produced
    50  following an invocation of 'wrstat walk' will be concatenated, compressed and
    51  placed at the root of the output directory in a file called 'combine.stats.gz'.
    52  
    53  Likewise, all the 'wrstat stat' *.byusergroup files will be merged,
    54  compressed and placed at the root of the output directory in a file called
    55  'combine.byusergroup.gz'.
    56  
    57  The same applies to the *.log files, being called 'combine.log.gz'.
    58  
    59  The *.dugt files will be turned in to databases in a directory
    60  'combine.dgut.db'.
    61  
    62  The *.bygroup files are merged but not compressed and called 'combine.bygroup'.
    63  
    64  NB: only call this by adding it to wr with a dependency on the dependency group
    65  you supplied 'wrstat walk'.`,
    66  	Run: func(cmd *cobra.Command, args []string) {
    67  		if len(args) != 1 {
    68  			die("exactly 1 'wrstat walk' output directory must be supplied")
    69  		}
    70  
    71  		sourceDir, err := filepath.Abs(args[0])
    72  		if err != nil {
    73  			die("could not get the absolute path to [%s]: %s", args[0], err)
    74  		}
    75  
    76  		var wg sync.WaitGroup
    77  
    78  		wg.Add(1)
    79  		go func() {
    80  			defer wg.Done()
    81  			concatenateAndCompressStatsFiles(sourceDir)
    82  		}()
    83  
    84  		wg.Add(1)
    85  		go func() {
    86  			defer wg.Done()
    87  			mergeAndCompressUserGroupFiles(sourceDir)
    88  		}()
    89  
    90  		wg.Add(1)
    91  		go func() {
    92  			defer wg.Done()
    93  			mergeGroupFiles(sourceDir)
    94  		}()
    95  
    96  		wg.Add(1)
    97  		go func() {
    98  			defer wg.Done()
    99  			mergeDGUTFilesToDB(sourceDir)
   100  		}()
   101  
   102  		wg.Add(1)
   103  		go func() {
   104  			defer wg.Done()
   105  			concatenateAndCompressLogFiles(sourceDir)
   106  		}()
   107  
   108  		wg.Wait()
   109  	},
   110  }
   111  
   112  func init() {
   113  	RootCmd.AddCommand(combineCmd)
   114  }
   115  
   116  // concatenateAndCompressStatsFiles finds and concatenates the stats files and
   117  // compresses the output.
   118  func concatenateAndCompressStatsFiles(sourceDir string) {
   119  	inputFiles, outputFile, err := fs.FindOpenAndCreate(sourceDir, sourceDir, statOutputFileSuffix,
   120  		combineStatsOutputFileBasename)
   121  	if err != nil {
   122  		die("failed to find, open or create stats files: %s", err)
   123  	}
   124  
   125  	if err = combine.StatFiles(inputFiles, outputFile); err != nil {
   126  		die("failed to concatenate and compress stats files (err: %s)", err)
   127  	}
   128  }
   129  
   130  // mergeAndCompressUserGroupFiles finds and merges the byusergroup files and
   131  // compresses the output.
   132  func mergeAndCompressUserGroupFiles(sourceDir string) {
   133  	inputFiles, outputFile, err := fs.FindOpenAndCreate(sourceDir,
   134  		sourceDir, statUserGroupSummaryOutputFileSuffix, combineUserGroupOutputFileBasename)
   135  	if err != nil {
   136  		die("failed to find, open or create usergroup files: %s", err)
   137  	}
   138  
   139  	if err = combine.UserGroupFiles(inputFiles, outputFile); err != nil {
   140  		die("failed to merge the user group files: %s", err)
   141  	}
   142  }
   143  
   144  // mergeGroupFiles finds and merges the bygroup files.
   145  func mergeGroupFiles(sourceDir string) {
   146  	inputFiles, outputFile, err := fs.FindOpenAndCreate(sourceDir, sourceDir,
   147  		statGroupSummaryOutputFileSuffix, combineGroupOutputFileBasename)
   148  	if err != nil {
   149  		die("failed to find, open or create group files: %s", err)
   150  	}
   151  
   152  	if err = combine.GroupFiles(inputFiles, outputFile); err != nil {
   153  		die("failed to merge the group files: %s", err)
   154  	}
   155  }
   156  
   157  // concatenateAndCompressLogFiles finds and merges the log files and compresses the
   158  // output.
   159  func concatenateAndCompressLogFiles(sourceDir string) {
   160  	inputFiles, outputFile, err := fs.FindOpenAndCreate(sourceDir,
   161  		sourceDir, statLogOutputFileSuffix, combineLogOutputFileBasename)
   162  	if err != nil {
   163  		die("failed to find, open or create log files: %s", err)
   164  	}
   165  
   166  	if err := combine.LogFiles(inputFiles, outputFile); err != nil {
   167  		die("failed to merge the log files: %s", err)
   168  	}
   169  }
   170  
   171  // mergeDGUTFilesToDB finds and merges the dgut files and then stores the
   172  // information in a database.
   173  func mergeDGUTFilesToDB(sourceDir string) {
   174  	paths, err := fs.FindFilePathsInDir(sourceDir, statDGUTSummaryOutputFileSuffix)
   175  	if err != nil {
   176  		die("failed to find the dgut files: %s", err)
   177  	}
   178  
   179  	outputDir := filepath.Join(sourceDir, combineDGUTOutputFileBasename)
   180  
   181  	if err = fs.RemoveAndCreateDir(outputDir); err != nil {
   182  		die("failed to remove or create the dgut directory: %s", err)
   183  	}
   184  
   185  	if err = combine.DgutFiles(paths, outputDir); err != nil {
   186  		die("failed to merge the dgut files: %s", err)
   187  	}
   188  }