kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/util/archive/reader.go (about) 1 /* 2 * Copyright 2016 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Package archive provides support for reading the contents of archives such 18 // as .zip and .tar files. 19 package archive // import "kythe.io/kythe/go/util/archive" 20 21 import ( 22 "archive/tar" 23 "archive/zip" 24 "compress/bzip2" 25 "compress/gzip" 26 "errors" 27 "fmt" 28 "io" 29 "path/filepath" 30 "strings" 31 ) 32 33 // File defines the input capabilities needed to scan an archive file. 34 type File interface { 35 io.Closer 36 io.Reader 37 io.ReaderAt 38 io.Seeker 39 } 40 41 // ErrNotArchive is returned by Scan when passed a file it does not recognize 42 // as a readable archive. 43 var ErrNotArchive = errors.New("not a supported archive file") 44 45 // A ScanFunc is invoked by the Scan function for each file found in the 46 // specified archive. The arguments are the filename as encoded in the archive, 47 // and either an error or a reader positioned at the beginning of the file's 48 // contents. 49 // 50 // Any error returned by the ScanFunc is propagated to the caller of Scan, 51 // terminating the traversal of the archive. The callback may choose to ignore 52 // err, in which case the error is ignored and scanning continues. 53 type ScanFunc func(filename string, err error, r io.Reader) error 54 55 // Scan sequentially scans the contents of an archive and invokes f for each 56 // file found. If f returns an error, scanning stops and that error is returned 57 // to the caller of Scan. The path is used to determine what type of archive is 58 // referred to by file. If the type is not known, it returns ErrNotArchive. 59 // 60 // The supported archive formats are: 61 // 62 // .zip -- ZIP archive (also .ZIP, .jar) 63 // .tar -- uncompressed tar 64 // .tar.gz -- gzip-compressed tar (also .tgz) 65 // .tar.bz2 -- bzip2-compressed tar 66 // 67 // Scan only invokes f for file entries; directories are not included. 68 func Scan(file File, path string, f ScanFunc) error { 69 format, compression := parsePath(path) 70 switch format { 71 case ".zip": 72 size, err := file.Seek(0, io.SeekEnd) 73 if err != nil { 74 return fmt.Errorf("archive: finding ZIP file size: %v", err) 75 } 76 archive, err := zip.NewReader(file, size) 77 if err != nil { 78 return fmt.Errorf("archive: opening ZIP reader: %v", err) 79 } 80 81 for _, entry := range archive.File { 82 rc, err := entry.Open() 83 err = f(entry.Name, err, rc) 84 rc.Close() 85 if err != nil { 86 return err 87 } 88 } 89 90 case ".tar": 91 r := io.Reader(file) 92 switch compression { 93 case ".gz": 94 gz, err := gzip.NewReader(file) 95 if err != nil { 96 return fmt.Errorf("archive: opening gzip reader: %v", err) 97 } 98 r = gz 99 case ".bz2": 100 r = bzip2.NewReader(file) 101 case "": 102 default: 103 } 104 archive := tar.NewReader(r) 105 106 for { 107 entry, err := archive.Next() 108 if err == io.EOF { 109 break 110 } 111 isFile := entry != nil && entry.FileInfo().Mode().IsRegular() 112 113 // If we got an entry of any kind, invoke the callback whether or 114 // not we have an error. If we didn't get an entry, treat an error 115 // here as fatal. 116 if err == nil { 117 err = f(entry.Name, nil, archive) 118 } else if isFile { 119 err = f(entry.Name, err, nil) 120 } 121 if err != nil { 122 return err 123 } 124 } 125 126 default: 127 return ErrNotArchive 128 } 129 return nil 130 } 131 132 // parsePath determines which file format is represented by path, returning the 133 // base file format (.zip or .tar) and the additional compression format 134 // extension (.gz or .bz2), or "" if there is no additional compression. 135 // Returns "", "" if the format could not be determined. 136 func parsePath(path string) (format, compression string) { 137 switch ext := filepath.Ext(path); ext { 138 case ".zip", ".ZIP", ".jar": 139 return ".zip", "" 140 case ".tar": 141 return ext, "" 142 case ".tgz": 143 return ".tar", ".gz" 144 case ".gz", ".bz2": 145 base := filepath.Ext(strings.TrimSuffix(path, ext)) 146 if base == ".tar" { 147 return base, ext 148 } 149 } 150 return "", "" // format unknown 151 }