github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/dsort/shard/targz.go (about)

     1  // Package shard provides Extract(shard), Create(shard), and associated methods
     2  // across all suppported archival formats (see cmn/archive/mime.go)
     3  /*
     4   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package shard
     7  
     8  import (
     9  	"archive/tar"
    10  	"compress/gzip"
    11  	"io"
    12  
    13  	"github.com/NVIDIA/aistore/cmn/archive"
    14  	"github.com/NVIDIA/aistore/cmn/cos"
    15  	"github.com/NVIDIA/aistore/core"
    16  )
    17  
    18  type tgzRW struct {
    19  	ext string
    20  }
    21  
    22  // interface guard
    23  var _ RW = (*tgzRW)(nil)
    24  
    25  func NewTargzRW(ext string) RW { return &tgzRW{ext: ext} }
    26  
    27  func (*tgzRW) IsCompressed() bool   { return true }
    28  func (*tgzRW) SupportsOffset() bool { return true }
    29  func (*tgzRW) MetadataSize() int64  { return archive.TarBlockSize } // size of tar header with padding
    30  
    31  // Extract reads the tarball f and extracts its metadata.
    32  // Writes work tar
    33  func (trw *tgzRW) Extract(lom *core.LOM, r cos.ReadReaderAt, extractor RecordExtractor, toDisk bool) (int64, int, error) {
    34  	ar, err := archive.NewReader(trw.ext, r)
    35  	if err != nil {
    36  		return 0, 0, err
    37  	}
    38  	c := &rcbCtx{parent: trw, extractor: extractor, shardName: lom.ObjName, toDisk: toDisk, fromTar: true}
    39  	err = c.extract(lom, ar)
    40  
    41  	return c.extractedSize, c.extractedCount, err
    42  }
    43  
    44  // create local shard based on Shard
    45  func (*tgzRW) Create(s *Shard, tarball io.Writer, loader ContentLoader) (written int64, err error) {
    46  	var (
    47  		gzw, _   = gzip.NewWriterLevel(tarball, gzip.BestSpeed)
    48  		tw       = tar.NewWriter(gzw)
    49  		rdReader = newTarRecordDataReader()
    50  	)
    51  	written, err = writeCompressedTar(s, tw, gzw, loader, rdReader)
    52  
    53  	// note the order of closing: tw, gzw, and eventually tarball (by the caller)
    54  	rdReader.free()
    55  	cos.Close(tw)
    56  	cos.Close(gzw)
    57  	return written, err
    58  }