github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/cmn/archive/write.go (about)

     1  // Package archive: write, read, copy, append, list primitives
     2  // across all supported formats
     3  /*
     4   * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package archive
     7  
     8  import (
     9  	"archive/tar"
    10  	"archive/zip"
    11  	"compress/gzip"
    12  	"io"
    13  	"os"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/NVIDIA/aistore/cmn"
    18  	"github.com/NVIDIA/aistore/cmn/cos"
    19  	"github.com/NVIDIA/aistore/cmn/debug"
    20  	"github.com/NVIDIA/aistore/cmn/feat"
    21  	"github.com/NVIDIA/aistore/memsys"
    22  	"github.com/pierrec/lz4/v3"
    23  )
    24  
    25  type (
    26  	HeaderCallback func(any)
    27  	Opts           struct {
    28  		CB        HeaderCallback
    29  		TarFormat tar.Format
    30  		Serialize bool
    31  	}
    32  )
    33  
    34  type (
    35  	Writer interface {
    36  		// Init specific writer
    37  		Write(nameInArch string, oah cos.OAH, reader io.Reader) error
    38  		// Close, cleanup
    39  		Fini()
    40  		// Copy arch, with potential subsequent APPEND
    41  		Copy(src io.Reader, size ...int64) error
    42  
    43  		// private
    44  		init(w io.Writer, cksum *cos.CksumHashSize, opts *Opts)
    45  	}
    46  	baseW struct {
    47  		wmul io.Writer
    48  		lck  sync.Locker // serialize: (multi-object => single shard)
    49  		buf  []byte
    50  		cb   HeaderCallback
    51  		slab *memsys.Slab
    52  	}
    53  	tarWriter struct {
    54  		baseW
    55  		format tar.Format
    56  		tw     *tar.Writer
    57  	}
    58  	tgzWriter struct {
    59  		tw  tarWriter
    60  		gzw *gzip.Writer
    61  	}
    62  	zipWriter struct {
    63  		baseW
    64  		zw *zip.Writer
    65  	}
    66  	lz4Writer struct {
    67  		tw  tarWriter
    68  		lzw *lz4.Writer
    69  	}
    70  )
    71  
    72  // interface guard
    73  var (
    74  	_ Writer = (*tarWriter)(nil)
    75  	_ Writer = (*tgzWriter)(nil)
    76  	_ Writer = (*zipWriter)(nil)
    77  	_ Writer = (*lz4Writer)(nil)
    78  )
    79  
    80  // calls init() -> open(),alloc()
    81  func NewWriter(mime string, w io.Writer, cksum *cos.CksumHashSize, opts *Opts) (aw Writer) {
    82  	switch mime {
    83  	case ExtTar:
    84  		aw = &tarWriter{}
    85  	case ExtTgz, ExtTarGz:
    86  		aw = &tgzWriter{}
    87  	case ExtZip:
    88  		aw = &zipWriter{}
    89  	case ExtTarLz4:
    90  		aw = &lz4Writer{}
    91  	default:
    92  		debug.Assert(false, mime)
    93  	}
    94  	aw.init(w, cksum, opts)
    95  	return
    96  }
    97  
    98  // baseW
    99  
   100  func (bw *baseW) init(w io.Writer, cksum *cos.CksumHashSize, opts *Opts) {
   101  	bw.buf, bw.slab = memsys.PageMM().Alloc()
   102  
   103  	bw.lck = cos.NopLocker{}
   104  	bw.cb = nopTarHeader
   105  	if opts != nil {
   106  		if opts.CB != nil {
   107  			bw.cb = opts.CB
   108  		}
   109  		if opts.Serialize {
   110  			bw.lck = &sync.Mutex{}
   111  		}
   112  	}
   113  	bw.wmul = w
   114  	if cksum != nil {
   115  		bw.wmul = cos.NewWriterMulti(w, cksum)
   116  	}
   117  }
   118  
   119  // tarWriter
   120  
   121  func (tw *tarWriter) init(w io.Writer, cksum *cos.CksumHashSize, opts *Opts) {
   122  	tw.baseW.init(w, cksum, opts)
   123  
   124  	tw.format = tar.FormatUnknown // default
   125  	if opts != nil {
   126  		tw.format = opts.TarFormat
   127  	}
   128  	debug.Assert(tw.format == tar.FormatUnknown || tw.format == tar.FormatUSTAR ||
   129  		tw.format == tar.FormatPAX || tw.format == tar.FormatGNU, tw.format.String())
   130  
   131  	tw.tw = tar.NewWriter(tw.wmul)
   132  }
   133  
   134  func (tw *tarWriter) Fini() {
   135  	tw.slab.Free(tw.buf)
   136  	tw.tw.Close()
   137  }
   138  
   139  func (tw *tarWriter) Write(fullname string, oah cos.OAH, reader io.Reader) (err error) {
   140  	hdr := tar.Header{
   141  		Typeflag: tar.TypeReg,
   142  		Name:     fullname,
   143  		Size:     oah.SizeBytes(),
   144  		ModTime:  time.Unix(0, oah.AtimeUnix()),
   145  		Mode:     int64(cos.PermRWRR),
   146  		Format:   tw.format,
   147  	}
   148  	tw.cb(&hdr)
   149  	tw.lck.Lock()
   150  	if err = tw.tw.WriteHeader(&hdr); err == nil {
   151  		_, err = io.CopyBuffer(tw.tw, reader, tw.buf)
   152  	}
   153  	tw.lck.Unlock()
   154  	return err
   155  }
   156  
   157  func (tw *tarWriter) Copy(src io.Reader, _ ...int64) error {
   158  	return cpTar(src, tw.tw, tw.buf)
   159  }
   160  
   161  // set Uid/Gid bits in TAR header
   162  // - note: cos.PermRWRR default
   163  // - not calling standard tar.FileInfoHeader
   164  
   165  func nopTarHeader(any) {}
   166  
   167  func SetTarHeader(hdr any) {
   168  	thdr := hdr.(*tar.Header)
   169  	{
   170  		thdr.Uid = os.Getuid()
   171  		thdr.Gid = os.Getgid()
   172  	}
   173  }
   174  
   175  // tgzWriter
   176  
   177  func (tzw *tgzWriter) init(w io.Writer, cksum *cos.CksumHashSize, opts *Opts) {
   178  	tzw.tw.baseW.init(w, cksum, opts)
   179  	tzw.gzw = gzip.NewWriter(tzw.tw.wmul)
   180  	tzw.tw.tw = tar.NewWriter(tzw.gzw)
   181  }
   182  
   183  func (tzw *tgzWriter) Fini() {
   184  	tzw.tw.Fini()
   185  	tzw.gzw.Close()
   186  }
   187  
   188  func (tzw *tgzWriter) Write(fullname string, oah cos.OAH, reader io.Reader) error {
   189  	return tzw.tw.Write(fullname, oah, reader)
   190  }
   191  
   192  func (tzw *tgzWriter) Copy(src io.Reader, _ ...int64) error {
   193  	gzr, err := gzip.NewReader(src)
   194  	if err != nil {
   195  		return err
   196  	}
   197  	err = cpTar(gzr, tzw.tw.tw, tzw.tw.buf)
   198  	cos.Close(gzr)
   199  	return err
   200  }
   201  
   202  // zipWriter
   203  
   204  func (zw *zipWriter) init(w io.Writer, cksum *cos.CksumHashSize, opts *Opts) {
   205  	zw.baseW.init(w, cksum, opts)
   206  	zw.zw = zip.NewWriter(zw.wmul)
   207  }
   208  
   209  func (zw *zipWriter) Fini() {
   210  	zw.slab.Free(zw.buf)
   211  	zw.zw.Close()
   212  }
   213  
   214  func (zw *zipWriter) Write(fullname string, oah cos.OAH, reader io.Reader) error {
   215  	ziphdr := zip.FileHeader{
   216  		Name:               fullname,
   217  		Comment:            fullname,
   218  		UncompressedSize64: uint64(oah.SizeBytes()),
   219  		Modified:           time.Unix(0, oah.AtimeUnix()),
   220  	}
   221  	zw.cb(&ziphdr)
   222  	zw.lck.Lock()
   223  	zipw, err := zw.zw.CreateHeader(&ziphdr)
   224  	if err == nil {
   225  		_, err = io.CopyBuffer(zipw, reader, zw.buf)
   226  	}
   227  	zw.lck.Unlock()
   228  	return err
   229  }
   230  
   231  func (zw *zipWriter) Copy(src io.Reader, size ...int64) error {
   232  	r, ok := src.(io.ReaderAt)
   233  	debug.Assert(ok && len(size) == 1)
   234  	return cpZip(r, size[0], zw.zw, zw.buf)
   235  }
   236  
   237  // lz4Writer
   238  
   239  func (lzw *lz4Writer) init(w io.Writer, cksum *cos.CksumHashSize, opts *Opts) {
   240  	lzw.tw.baseW.init(w, cksum, opts)
   241  	lzw.lzw = lz4.NewWriter(lzw.tw.wmul)
   242  
   243  	lzw.lzw.Header.BlockChecksum = false
   244  	lzw.lzw.Header.NoChecksum = !cmn.Rom.Features().IsSet(feat.LZ4FrameChecksum)
   245  	lzw.lzw.Header.BlockMaxSize = 256 * cos.KiB
   246  	if cmn.Rom.Features().IsSet(feat.LZ4Block1MB) {
   247  		lzw.lzw.Header.BlockMaxSize = cos.MiB
   248  	}
   249  
   250  	lzw.tw.tw = tar.NewWriter(lzw.lzw)
   251  }
   252  
   253  func (lzw *lz4Writer) Fini() {
   254  	lzw.tw.Fini()
   255  	lzw.lzw.Close()
   256  }
   257  
   258  func (lzw *lz4Writer) Write(fullname string, oah cos.OAH, reader io.Reader) error {
   259  	return lzw.tw.Write(fullname, oah, reader)
   260  }
   261  
   262  func (lzw *lz4Writer) Copy(src io.Reader, _ ...int64) error {
   263  	lzr := lz4.NewReader(src)
   264  	return cpTar(lzr, lzw.tw.tw, lzw.tw.buf)
   265  }