github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/dsort/shard/tarlz4.go (about)

     1  // Package shard provides Extract(shard), Create(shard), and associated methods
     2  // across all suppported archival formats (see cmn/archive/mime.go)
     3  /*
     4   * Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package shard
     7  
     8  import (
     9  	"archive/tar"
    10  	"io"
    11  
    12  	"github.com/NVIDIA/aistore/cmn/archive"
    13  	"github.com/NVIDIA/aistore/cmn/cos"
    14  	"github.com/NVIDIA/aistore/core"
    15  	"github.com/pierrec/lz4/v3"
    16  )
    17  
    18  type tlz4RW struct {
    19  	ext string
    20  }
    21  
    22  // interface guard
    23  var _ RW = (*tlz4RW)(nil)
    24  
    25  func NewTarlz4RW() RW { return &tlz4RW{ext: archive.ExtTarLz4} }
    26  
    27  func (*tlz4RW) IsCompressed() bool   { return true }
    28  func (*tlz4RW) SupportsOffset() bool { return true }
    29  func (*tlz4RW) MetadataSize() int64  { return archive.TarBlockSize } // size of tar header with padding
    30  
    31  // Extract  the tarball f and extracts its metadata.
    32  func (trw *tlz4RW) Extract(lom *core.LOM, r cos.ReadReaderAt, extractor RecordExtractor, toDisk bool) (int64, int, error) {
    33  	ar, err := archive.NewReader(trw.ext, r)
    34  	if err != nil {
    35  		return 0, 0, err
    36  	}
    37  	c := &rcbCtx{parent: trw, extractor: extractor, shardName: lom.ObjName, toDisk: toDisk, fromTar: true}
    38  	err = c.extract(lom, ar)
    39  
    40  	return c.extractedSize, c.extractedCount, err
    41  }
    42  
    43  // create local shard based on Shard
    44  func (*tlz4RW) Create(s *Shard, tarball io.Writer, loader ContentLoader) (written int64, err error) {
    45  	var (
    46  		lzw      = lz4.NewWriter(tarball)
    47  		tw       = tar.NewWriter(lzw)
    48  		rdReader = newTarRecordDataReader()
    49  	)
    50  	written, err = writeCompressedTar(s, tw, lzw, loader, rdReader)
    51  
    52  	// note the order of closing: tw, gzw, and eventually tarball (by the caller)
    53  	rdReader.free()
    54  	cos.Close(tw)
    55  	cos.Close(lzw)
    56  	return written, err
    57  }