kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/platform/tools/kzip/mergecmd/mergecmd.go (about)

     1  /*
     2   * Copyright 2019 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package mergecmd provides the kzip command for merging archives.
    18  package mergecmd // import "kythe.io/kythe/go/platform/tools/kzip/mergecmd"
    19  
    20  import (
    21  	"bufio"
    22  	"context"
    23  	"flag"
    24  	"fmt"
    25  	"io"
    26  	"os"
    27  	"path/filepath"
    28  	"strings"
    29  
    30  	"kythe.io/kythe/go/platform/kzip"
    31  	"kythe.io/kythe/go/platform/tools/kzip/flags"
    32  	"kythe.io/kythe/go/platform/vfs"
    33  	"kythe.io/kythe/go/util/cmdutil"
    34  	"kythe.io/kythe/go/util/log"
    35  
    36  	"bitbucket.org/creachadair/stringset"
    37  	"github.com/google/subcommands"
    38  )
    39  
    40  type mergeCommand struct {
    41  	cmdutil.Info
    42  
    43  	output             string
    44  	inputFileList      string
    45  	append             bool
    46  	encoding           flags.EncodingFlag
    47  	recursive          bool
    48  	ignoreDuplicateCUs bool
    49  	rules              vnameRules
    50  
    51  	unitsBeforeFiles bool
    52  }
    53  
    54  // New creates a new subcommand for merging kzip files.
    55  func New() subcommands.Command {
    56  	return &mergeCommand{
    57  		Info:     cmdutil.NewInfo("merge", "merge kzip files", "--output path kzip-file*"),
    58  		encoding: flags.EncodingFlag{Encoding: kzip.DefaultEncoding()},
    59  	}
    60  }
    61  
    62  // SetFlags implements the subcommands interface and provides command-specific flags
    63  // for merging kzip files.
    64  func (c *mergeCommand) SetFlags(fs *flag.FlagSet) {
    65  	fs.StringVar(&c.output, "output", "", "Path to output kzip file")
    66  	fs.StringVar(&c.inputFileList, "input_file_list", "", "Path to a newline-delimited text file containing a list of input kzip files. If '-' is specified, the file list is read from stdin")
    67  	fs.BoolVar(&c.append, "append", false, "Whether to additionally merge the contents of the existing output file, if it exists")
    68  	fs.Var(&c.encoding, "encoding", "Encoding to use on output, one of JSON, PROTO, or ALL")
    69  	fs.BoolVar(&c.recursive, "recursive", false, "Recurisvely merge .kzip files from directories")
    70  	fs.Var(&c.rules, "rules", "VName rules to apply while merging (optional)")
    71  	fs.BoolVar(&c.ignoreDuplicateCUs, "ignore_duplicate_cus", false, "Do not fail if we try to add the same CU twice")
    72  	fs.BoolVar(&c.unitsBeforeFiles, "experimental_write_units_first", false, "When writing the kzip file, puts CU entries before files")
    73  }
    74  
    75  // Execute implements the subcommands interface and merges the provided files.
    76  func (c *mergeCommand) Execute(ctx context.Context, fs *flag.FlagSet, _ ...any) subcommands.ExitStatus {
    77  	if c.output == "" {
    78  		return c.Fail("Required --output path missing")
    79  	}
    80  	opt := kzip.WithEncoding(c.encoding.Encoding)
    81  	dir, file := filepath.Split(c.output)
    82  	if dir == "" {
    83  		dir = "."
    84  	}
    85  	tmpOut, err := vfs.CreateTempFile(ctx, dir, file)
    86  	if err != nil {
    87  		return c.Fail("Error creating temp output: %v", err)
    88  	}
    89  	tmpName := tmpOut.Name()
    90  	defer func() {
    91  		if tmpOut != nil {
    92  			tmpOut.Close()
    93  			vfs.Remove(ctx, tmpName)
    94  		}
    95  	}()
    96  
    97  	var archives []string
    98  	if c.inputFileList != "" && len(fs.Args()) > 0 {
    99  		return c.Fail("Specify *either* --input_file_list or positional arguments, but not both")
   100  	}
   101  	if c.inputFileList != "" {
   102  		archives, err = fileListFromTextFile(c.inputFileList)
   103  		if err != nil {
   104  			return c.Fail("Error reading input file list: %v", err)
   105  		}
   106  	} else {
   107  		archives = fs.Args()
   108  	}
   109  
   110  	if c.recursive {
   111  		archives, err = recurseDirectories(ctx, archives)
   112  		if err != nil {
   113  			return c.Fail("Error reading archives: %s", err)
   114  		}
   115  	}
   116  	if c.append {
   117  		orig, err := vfs.Open(ctx, c.output)
   118  		if err == nil {
   119  			archives = append([]string{c.output}, archives...)
   120  			if err := orig.Close(); err != nil {
   121  				return c.Fail("Error closing original: %v", err)
   122  			}
   123  		}
   124  	}
   125  	if err := c.mergeArchives(ctx, tmpOut, archives, opt); err != nil {
   126  		return c.Fail("Error merging archives: %v", err)
   127  	}
   128  	if err := vfs.Rename(ctx, tmpName, c.output); err != nil {
   129  		return c.Fail("Error renaming tmp to output: %v", err)
   130  	}
   131  	return subcommands.ExitSuccess
   132  }
   133  
   134  func (c *mergeCommand) mergeArchives(ctx context.Context, out io.WriteCloser, archives []string, opts ...kzip.WriterOption) error {
   135  	wr, err := kzip.NewWriteCloser(out, opts...)
   136  	if err != nil {
   137  		out.Close()
   138  		return fmt.Errorf("error creating writer: %v", err)
   139  	}
   140  
   141  	filesAdded := stringset.New()
   142  	for _, path := range archives {
   143  		if err := c.mergeInto(ctx, wr, path, filesAdded); err != nil {
   144  			wr.Close()
   145  			return err
   146  		}
   147  	}
   148  
   149  	if err := wr.Close(); err != nil {
   150  		return fmt.Errorf("error closing writer: %v", err)
   151  	}
   152  	return nil
   153  }
   154  
   155  func (c *mergeCommand) mergeInto(ctx context.Context, wr *kzip.Writer, path string, filesAdded stringset.Set) error {
   156  	f, err := vfs.Open(ctx, path)
   157  	if err != nil {
   158  		return fmt.Errorf("error opening archive: %v", err)
   159  	}
   160  	defer f.Close()
   161  
   162  	stat, err := vfs.Stat(ctx, path)
   163  	if err != nil {
   164  		return err
   165  	}
   166  	size := stat.Size()
   167  	if size == 0 {
   168  		log.InfoContextf(ctx, "Skipping empty .kzip: %s", path)
   169  		return nil
   170  	}
   171  
   172  	rd, err := kzip.NewReader(f, size)
   173  	if err != nil {
   174  		return fmt.Errorf("error creating reader: %v", err)
   175  	}
   176  
   177  	if c.unitsBeforeFiles {
   178  		var requiredDigests []string
   179  		if err := c.mergeUnitsInto(ctx, wr, rd, func(digest string) error {
   180  			requiredDigests = append(requiredDigests, digest)
   181  			return nil
   182  		}); err != nil {
   183  			return err
   184  		}
   185  		for _, digest := range requiredDigests {
   186  			if err := copyFileInto(wr, rd, digest, filesAdded); err != nil {
   187  				return err
   188  			}
   189  		}
   190  		return nil
   191  	}
   192  	return c.mergeUnitsInto(ctx, wr, rd, func(digest string) error {
   193  		return copyFileInto(wr, rd, digest, filesAdded)
   194  	})
   195  }
   196  
   197  func (c *mergeCommand) mergeUnitsInto(ctx context.Context, wr *kzip.Writer, rd *kzip.Reader, f func(digest string) error) error {
   198  	return rd.Scan(func(u *kzip.Unit) error {
   199  		for _, ri := range u.Proto.RequiredInput {
   200  			if err := f(ri.Info.Digest); err != nil {
   201  				return err
   202  			}
   203  			if vname, match := c.rules.Apply(ri.Info.Path); match {
   204  				ri.VName = vname
   205  			}
   206  		}
   207  		// TODO(schroederc): duplicate compilations with different revisions
   208  		_, err := wr.AddUnit(u.Proto, u.Index)
   209  		if c.ignoreDuplicateCUs && err == kzip.ErrUnitExists {
   210  			log.InfoContextf(ctx, "Found duplicate CU: %v", u.Proto.GetDetails())
   211  			return nil
   212  		}
   213  		return err
   214  	})
   215  }
   216  
   217  func copyFileInto(wr *kzip.Writer, rd *kzip.Reader, digest string, filesAdded stringset.Set) error {
   218  	if filesAdded.Add(digest) {
   219  		r, err := rd.Open(digest)
   220  		if err != nil {
   221  			return fmt.Errorf("error opening file: %v", err)
   222  		}
   223  		if _, err := wr.AddFile(r); err != nil {
   224  			r.Close()
   225  			return fmt.Errorf("error adding file: %v", err)
   226  		} else if err := r.Close(); err != nil {
   227  			return fmt.Errorf("error closing file: %v", err)
   228  		}
   229  	}
   230  	return nil
   231  }
   232  
   233  func recurseDirectories(ctx context.Context, archives []string) ([]string, error) {
   234  	var files []string
   235  	for _, path := range archives {
   236  		err := vfs.Walk(ctx, path, func(file string, info os.FileInfo, err error) error {
   237  			if err != nil || info.IsDir() {
   238  				return err
   239  			}
   240  
   241  			// Include the file if it was directly specified or ends in .kzip.
   242  			if file == path || strings.HasSuffix(file, ".kzip") {
   243  				files = append(files, file)
   244  			}
   245  
   246  			return err
   247  		})
   248  		if err != nil {
   249  			return files, err
   250  		}
   251  	}
   252  	return files, nil
   253  
   254  }
   255  
   256  // fileListFromTextFile returns a list of entries from a newline-delimited text
   257  // file
   258  func fileListFromTextFile(filePath string) ([]string, error) {
   259  	var f *os.File
   260  	if filePath == "-" {
   261  		f = os.Stdin
   262  	} else {
   263  		var err error
   264  		f, err = os.Open(filePath)
   265  		if err != nil {
   266  			return nil, err
   267  		}
   268  		defer f.Close()
   269  	}
   270  	scanner := bufio.NewScanner(f)
   271  	scanner.Split(bufio.ScanLines)
   272  
   273  	var kzipPaths []string
   274  	for scanner.Scan() {
   275  		if scanner.Text() != "" {
   276  			kzipPaths = append(kzipPaths, scanner.Text())
   277  		}
   278  	}
   279  
   280  	return kzipPaths, nil
   281  }