kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/platform/tools/kzip/mergecmd/mergecmd.go (about) 1 /* 2 * Copyright 2019 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Package mergecmd provides the kzip command for merging archives. 18 package mergecmd // import "kythe.io/kythe/go/platform/tools/kzip/mergecmd" 19 20 import ( 21 "bufio" 22 "context" 23 "flag" 24 "fmt" 25 "io" 26 "os" 27 "path/filepath" 28 "strings" 29 30 "kythe.io/kythe/go/platform/kzip" 31 "kythe.io/kythe/go/platform/tools/kzip/flags" 32 "kythe.io/kythe/go/platform/vfs" 33 "kythe.io/kythe/go/util/cmdutil" 34 "kythe.io/kythe/go/util/log" 35 36 "bitbucket.org/creachadair/stringset" 37 "github.com/google/subcommands" 38 ) 39 40 type mergeCommand struct { 41 cmdutil.Info 42 43 output string 44 inputFileList string 45 append bool 46 encoding flags.EncodingFlag 47 recursive bool 48 ignoreDuplicateCUs bool 49 rules vnameRules 50 51 unitsBeforeFiles bool 52 } 53 54 // New creates a new subcommand for merging kzip files. 55 func New() subcommands.Command { 56 return &mergeCommand{ 57 Info: cmdutil.NewInfo("merge", "merge kzip files", "--output path kzip-file*"), 58 encoding: flags.EncodingFlag{Encoding: kzip.DefaultEncoding()}, 59 } 60 } 61 62 // SetFlags implements the subcommands interface and provides command-specific flags 63 // for merging kzip files. 64 func (c *mergeCommand) SetFlags(fs *flag.FlagSet) { 65 fs.StringVar(&c.output, "output", "", "Path to output kzip file") 66 fs.StringVar(&c.inputFileList, "input_file_list", "", "Path to a newline-delimited text file containing a list of input kzip files. If '-' is specified, the file list is read from stdin") 67 fs.BoolVar(&c.append, "append", false, "Whether to additionally merge the contents of the existing output file, if it exists") 68 fs.Var(&c.encoding, "encoding", "Encoding to use on output, one of JSON, PROTO, or ALL") 69 fs.BoolVar(&c.recursive, "recursive", false, "Recurisvely merge .kzip files from directories") 70 fs.Var(&c.rules, "rules", "VName rules to apply while merging (optional)") 71 fs.BoolVar(&c.ignoreDuplicateCUs, "ignore_duplicate_cus", false, "Do not fail if we try to add the same CU twice") 72 fs.BoolVar(&c.unitsBeforeFiles, "experimental_write_units_first", false, "When writing the kzip file, puts CU entries before files") 73 } 74 75 // Execute implements the subcommands interface and merges the provided files. 76 func (c *mergeCommand) Execute(ctx context.Context, fs *flag.FlagSet, _ ...any) subcommands.ExitStatus { 77 if c.output == "" { 78 return c.Fail("Required --output path missing") 79 } 80 opt := kzip.WithEncoding(c.encoding.Encoding) 81 dir, file := filepath.Split(c.output) 82 if dir == "" { 83 dir = "." 84 } 85 tmpOut, err := vfs.CreateTempFile(ctx, dir, file) 86 if err != nil { 87 return c.Fail("Error creating temp output: %v", err) 88 } 89 tmpName := tmpOut.Name() 90 defer func() { 91 if tmpOut != nil { 92 tmpOut.Close() 93 vfs.Remove(ctx, tmpName) 94 } 95 }() 96 97 var archives []string 98 if c.inputFileList != "" && len(fs.Args()) > 0 { 99 return c.Fail("Specify *either* --input_file_list or positional arguments, but not both") 100 } 101 if c.inputFileList != "" { 102 archives, err = fileListFromTextFile(c.inputFileList) 103 if err != nil { 104 return c.Fail("Error reading input file list: %v", err) 105 } 106 } else { 107 archives = fs.Args() 108 } 109 110 if c.recursive { 111 archives, err = recurseDirectories(ctx, archives) 112 if err != nil { 113 return c.Fail("Error reading archives: %s", err) 114 } 115 } 116 if c.append { 117 orig, err := vfs.Open(ctx, c.output) 118 if err == nil { 119 archives = append([]string{c.output}, archives...) 120 if err := orig.Close(); err != nil { 121 return c.Fail("Error closing original: %v", err) 122 } 123 } 124 } 125 if err := c.mergeArchives(ctx, tmpOut, archives, opt); err != nil { 126 return c.Fail("Error merging archives: %v", err) 127 } 128 if err := vfs.Rename(ctx, tmpName, c.output); err != nil { 129 return c.Fail("Error renaming tmp to output: %v", err) 130 } 131 return subcommands.ExitSuccess 132 } 133 134 func (c *mergeCommand) mergeArchives(ctx context.Context, out io.WriteCloser, archives []string, opts ...kzip.WriterOption) error { 135 wr, err := kzip.NewWriteCloser(out, opts...) 136 if err != nil { 137 out.Close() 138 return fmt.Errorf("error creating writer: %v", err) 139 } 140 141 filesAdded := stringset.New() 142 for _, path := range archives { 143 if err := c.mergeInto(ctx, wr, path, filesAdded); err != nil { 144 wr.Close() 145 return err 146 } 147 } 148 149 if err := wr.Close(); err != nil { 150 return fmt.Errorf("error closing writer: %v", err) 151 } 152 return nil 153 } 154 155 func (c *mergeCommand) mergeInto(ctx context.Context, wr *kzip.Writer, path string, filesAdded stringset.Set) error { 156 f, err := vfs.Open(ctx, path) 157 if err != nil { 158 return fmt.Errorf("error opening archive: %v", err) 159 } 160 defer f.Close() 161 162 stat, err := vfs.Stat(ctx, path) 163 if err != nil { 164 return err 165 } 166 size := stat.Size() 167 if size == 0 { 168 log.InfoContextf(ctx, "Skipping empty .kzip: %s", path) 169 return nil 170 } 171 172 rd, err := kzip.NewReader(f, size) 173 if err != nil { 174 return fmt.Errorf("error creating reader: %v", err) 175 } 176 177 if c.unitsBeforeFiles { 178 var requiredDigests []string 179 if err := c.mergeUnitsInto(ctx, wr, rd, func(digest string) error { 180 requiredDigests = append(requiredDigests, digest) 181 return nil 182 }); err != nil { 183 return err 184 } 185 for _, digest := range requiredDigests { 186 if err := copyFileInto(wr, rd, digest, filesAdded); err != nil { 187 return err 188 } 189 } 190 return nil 191 } 192 return c.mergeUnitsInto(ctx, wr, rd, func(digest string) error { 193 return copyFileInto(wr, rd, digest, filesAdded) 194 }) 195 } 196 197 func (c *mergeCommand) mergeUnitsInto(ctx context.Context, wr *kzip.Writer, rd *kzip.Reader, f func(digest string) error) error { 198 return rd.Scan(func(u *kzip.Unit) error { 199 for _, ri := range u.Proto.RequiredInput { 200 if err := f(ri.Info.Digest); err != nil { 201 return err 202 } 203 if vname, match := c.rules.Apply(ri.Info.Path); match { 204 ri.VName = vname 205 } 206 } 207 // TODO(schroederc): duplicate compilations with different revisions 208 _, err := wr.AddUnit(u.Proto, u.Index) 209 if c.ignoreDuplicateCUs && err == kzip.ErrUnitExists { 210 log.InfoContextf(ctx, "Found duplicate CU: %v", u.Proto.GetDetails()) 211 return nil 212 } 213 return err 214 }) 215 } 216 217 func copyFileInto(wr *kzip.Writer, rd *kzip.Reader, digest string, filesAdded stringset.Set) error { 218 if filesAdded.Add(digest) { 219 r, err := rd.Open(digest) 220 if err != nil { 221 return fmt.Errorf("error opening file: %v", err) 222 } 223 if _, err := wr.AddFile(r); err != nil { 224 r.Close() 225 return fmt.Errorf("error adding file: %v", err) 226 } else if err := r.Close(); err != nil { 227 return fmt.Errorf("error closing file: %v", err) 228 } 229 } 230 return nil 231 } 232 233 func recurseDirectories(ctx context.Context, archives []string) ([]string, error) { 234 var files []string 235 for _, path := range archives { 236 err := vfs.Walk(ctx, path, func(file string, info os.FileInfo, err error) error { 237 if err != nil || info.IsDir() { 238 return err 239 } 240 241 // Include the file if it was directly specified or ends in .kzip. 242 if file == path || strings.HasSuffix(file, ".kzip") { 243 files = append(files, file) 244 } 245 246 return err 247 }) 248 if err != nil { 249 return files, err 250 } 251 } 252 return files, nil 253 254 } 255 256 // fileListFromTextFile returns a list of entries from a newline-delimited text 257 // file 258 func fileListFromTextFile(filePath string) ([]string, error) { 259 var f *os.File 260 if filePath == "-" { 261 f = os.Stdin 262 } else { 263 var err error 264 f, err = os.Open(filePath) 265 if err != nil { 266 return nil, err 267 } 268 defer f.Close() 269 } 270 scanner := bufio.NewScanner(f) 271 scanner.Split(bufio.ScanLines) 272 273 var kzipPaths []string 274 for scanner.Scan() { 275 if scanner.Text() != "" { 276 kzipPaths = append(kzipPaths, scanner.Text()) 277 } 278 } 279 280 return kzipPaths, nil 281 }