kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/util/archive/reader.go (about)

     1  /*
     2   * Copyright 2016 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package archive provides support for reading the contents of archives such
    18  // as .zip and .tar files.
    19  package archive // import "kythe.io/kythe/go/util/archive"
    20  
    21  import (
    22  	"archive/tar"
    23  	"archive/zip"
    24  	"compress/bzip2"
    25  	"compress/gzip"
    26  	"errors"
    27  	"fmt"
    28  	"io"
    29  	"path/filepath"
    30  	"strings"
    31  )
    32  
    33  // File defines the input capabilities needed to scan an archive file.
    34  type File interface {
    35  	io.Closer
    36  	io.Reader
    37  	io.ReaderAt
    38  	io.Seeker
    39  }
    40  
    41  // ErrNotArchive is returned by Scan when passed a file it does not recognize
    42  // as a readable archive.
    43  var ErrNotArchive = errors.New("not a supported archive file")
    44  
    45  // A ScanFunc is invoked by the Scan function for each file found in the
    46  // specified archive. The arguments are the filename as encoded in the archive,
    47  // and either an error or a reader positioned at the beginning of the file's
    48  // contents.
    49  //
    50  // Any error returned by the ScanFunc is propagated to the caller of Scan,
    51  // terminating the traversal of the archive.  The callback may choose to ignore
    52  // err, in which case the error is ignored and scanning continues.
    53  type ScanFunc func(filename string, err error, r io.Reader) error
    54  
    55  // Scan sequentially scans the contents of an archive and invokes f for each
    56  // file found. If f returns an error, scanning stops and that error is returned
    57  // to the caller of Scan. The path is used to determine what type of archive is
    58  // referred to by file. If the type is not known, it returns ErrNotArchive.
    59  //
    60  // The supported archive formats are:
    61  //
    62  //	.zip     -- ZIP archive (also .ZIP, .jar)
    63  //	.tar     -- uncompressed tar
    64  //	.tar.gz  -- gzip-compressed tar (also .tgz)
    65  //	.tar.bz2 -- bzip2-compressed tar
    66  //
    67  // Scan only invokes f for file entries; directories are not included.
    68  func Scan(file File, path string, f ScanFunc) error {
    69  	format, compression := parsePath(path)
    70  	switch format {
    71  	case ".zip":
    72  		size, err := file.Seek(0, io.SeekEnd)
    73  		if err != nil {
    74  			return fmt.Errorf("archive: finding ZIP file size: %v", err)
    75  		}
    76  		archive, err := zip.NewReader(file, size)
    77  		if err != nil {
    78  			return fmt.Errorf("archive: opening ZIP reader: %v", err)
    79  		}
    80  
    81  		for _, entry := range archive.File {
    82  			rc, err := entry.Open()
    83  			err = f(entry.Name, err, rc)
    84  			rc.Close()
    85  			if err != nil {
    86  				return err
    87  			}
    88  		}
    89  
    90  	case ".tar":
    91  		r := io.Reader(file)
    92  		switch compression {
    93  		case ".gz":
    94  			gz, err := gzip.NewReader(file)
    95  			if err != nil {
    96  				return fmt.Errorf("archive: opening gzip reader: %v", err)
    97  			}
    98  			r = gz
    99  		case ".bz2":
   100  			r = bzip2.NewReader(file)
   101  		case "":
   102  		default:
   103  		}
   104  		archive := tar.NewReader(r)
   105  
   106  		for {
   107  			entry, err := archive.Next()
   108  			if err == io.EOF {
   109  				break
   110  			}
   111  			isFile := entry != nil && entry.FileInfo().Mode().IsRegular()
   112  
   113  			// If we got an entry of any kind, invoke the callback whether or
   114  			// not we have an error. If we didn't get an entry, treat an error
   115  			// here as fatal.
   116  			if err == nil {
   117  				err = f(entry.Name, nil, archive)
   118  			} else if isFile {
   119  				err = f(entry.Name, err, nil)
   120  			}
   121  			if err != nil {
   122  				return err
   123  			}
   124  		}
   125  
   126  	default:
   127  		return ErrNotArchive
   128  	}
   129  	return nil
   130  }
   131  
   132  // parsePath determines which file format is represented by path, returning the
   133  // base file format (.zip or .tar) and the additional compression format
   134  // extension (.gz or .bz2), or "" if there is no additional compression.
   135  // Returns "", "" if the format could not be determined.
   136  func parsePath(path string) (format, compression string) {
   137  	switch ext := filepath.Ext(path); ext {
   138  	case ".zip", ".ZIP", ".jar":
   139  		return ".zip", ""
   140  	case ".tar":
   141  		return ext, ""
   142  	case ".tgz":
   143  		return ".tar", ".gz"
   144  	case ".gz", ".bz2":
   145  		base := filepath.Ext(strings.TrimSuffix(path, ext))
   146  		if base == ".tar" {
   147  			return base, ext
   148  		}
   149  	}
   150  	return "", "" // format unknown
   151  }