github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/cmn/archive/list.go (about)

     1  // Package archive: write, read, copy, append, list primitives
     2  // across all supported formats
     3  /*
     4   * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package archive
     7  
     8  import (
     9  	"archive/tar"
    10  	"archive/zip"
    11  	"compress/gzip"
    12  	"io"
    13  	"os"
    14  	"sort"
    15  
    16  	"github.com/NVIDIA/aistore/cmn/cos"
    17  	"github.com/NVIDIA/aistore/cmn/debug"
    18  	"github.com/pierrec/lz4/v3"
    19  )
    20  
    21  // TODO (feature): support non-standard file extensions (see NOTE below)
    22  
    23  // archived file entry
    24  type Entry struct {
    25  	Name string
    26  	Size int64 // uncompressed size
    27  }
    28  
    29  func List(fqn string) ([]*Entry, error) {
    30  	var (
    31  		lst   []*Entry
    32  		finfo os.FileInfo
    33  	)
    34  	fh, err := os.Open(fqn)
    35  	if err != nil {
    36  		return nil, err
    37  	}
    38  	mime, err := MimeFile(fh, nil /*NOTE: not reading file magic*/, "", fqn)
    39  	if err != nil {
    40  		return nil, err
    41  	}
    42  	switch mime {
    43  	case ExtTar:
    44  		lst, err = lsTar(fh)
    45  	case ExtTgz, ExtTarGz:
    46  		lst, err = lsTgz(fh)
    47  	case ExtZip:
    48  		finfo, err = os.Stat(fqn)
    49  		if err == nil {
    50  			lst, err = lsZip(fh, finfo.Size())
    51  		}
    52  	case ExtTarLz4:
    53  		lst, err = lsLz4(fh)
    54  	default:
    55  		debug.Assert(false, mime)
    56  	}
    57  	cos.Close(fh)
    58  	if err != nil {
    59  		return nil, err
    60  	}
    61  	// paging requires them sorted
    62  	sort.Slice(lst, func(i, j int) bool { return lst[i].Name < lst[j].Name })
    63  	return lst, nil
    64  }
    65  
    66  // list: tar, tgz, zip, msgpack
    67  func lsTar(reader io.Reader) (lst []*Entry, _ error) {
    68  	tr := tar.NewReader(reader)
    69  	for {
    70  		hdr, err := tr.Next()
    71  		if err != nil {
    72  			if err == io.EOF {
    73  				return lst, nil // ok
    74  			}
    75  			return nil, err
    76  		}
    77  		if hdr.FileInfo().IsDir() {
    78  			continue
    79  		}
    80  		e := &Entry{Name: hdr.Name, Size: hdr.Size}
    81  		lst = append(lst, e)
    82  	}
    83  }
    84  
    85  func lsTgz(reader io.Reader) ([]*Entry, error) {
    86  	gzr, err := gzip.NewReader(reader)
    87  	if err != nil {
    88  		return nil, err
    89  	}
    90  	return lsTar(gzr)
    91  }
    92  
    93  func lsZip(readerAt cos.ReadReaderAt, size int64) (lst []*Entry, err error) {
    94  	var zr *zip.Reader
    95  	if zr, err = zip.NewReader(readerAt, size); err != nil {
    96  		return
    97  	}
    98  	for _, f := range zr.File {
    99  		finfo := f.FileInfo()
   100  		if finfo.IsDir() {
   101  			continue
   102  		}
   103  		e := &Entry{
   104  			Name: f.FileHeader.Name,
   105  			Size: int64(f.FileHeader.UncompressedSize64),
   106  		}
   107  		lst = append(lst, e)
   108  	}
   109  	return
   110  }
   111  
   112  func lsLz4(reader io.Reader) ([]*Entry, error) {
   113  	lzr := lz4.NewReader(reader)
   114  	return lsTar(lzr)
   115  }