github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/tools/tarch/archive.go (about)

     1  // Package archive provides common low-level utilities for testing archives
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package tarch
     6  
     7  import (
     8  	"archive/tar"
     9  	"bytes"
    10  	"fmt"
    11  	"io"
    12  	"math/rand"
    13  	"os"
    14  	"strconv"
    15  	"sync"
    16  	"time"
    17  
    18  	"github.com/NVIDIA/aistore/cmn/archive"
    19  	"github.com/NVIDIA/aistore/cmn/cos"
    20  	"github.com/NVIDIA/aistore/cmn/debug"
    21  	"github.com/NVIDIA/aistore/ext/dsort/shard"
    22  	"github.com/NVIDIA/aistore/tools/cryptorand"
    23  )
    24  
    25  var pool1m, pool128k, pool32k sync.Pool
    26  
    27  var (
    28  	_ archive.ArchRCB = (*rcbCtx)(nil)
    29  	_ archive.ArchRCB = (*rcbDummy)(nil)
    30  )
    31  
    32  type (
    33  	FileContent struct {
    34  		Name    string
    35  		Ext     string
    36  		Content []byte
    37  	}
    38  	dummyFile struct {
    39  		name string
    40  		size int64
    41  	}
    42  	rcbCtx struct {
    43  		files []FileContent
    44  		ext   string
    45  	}
    46  	rcbDummy struct {
    47  		files []os.FileInfo
    48  	}
    49  )
    50  
    51  func addBufferToArch(aw archive.Writer, path string, l int, buf []byte) error {
    52  	if buf == nil {
    53  		buf = newBuf(l)
    54  		defer freeBuf(buf)
    55  		buf = buf[:l]
    56  		_, err := cryptorand.Read(buf[:l/3])
    57  		debug.AssertNoErr(err)
    58  		copy(buf[2*l/3:], buf)
    59  	}
    60  	reader := bytes.NewBuffer(buf)
    61  	oah := cos.SimpleOAH{Size: int64(l)}
    62  	return aw.Write(path, oah, reader)
    63  }
    64  
    65  func CreateArchRandomFiles(shardName string, tarFormat tar.Format, ext string, fileCnt, fileSize int,
    66  	dup bool, recExts, randNames []string) error {
    67  	wfh, err := cos.CreateFile(shardName)
    68  	if err != nil {
    69  		return err
    70  	}
    71  
    72  	aw := archive.NewWriter(ext, wfh, nil, &archive.Opts{TarFormat: tarFormat})
    73  	defer func() {
    74  		aw.Fini()
    75  		wfh.Close()
    76  	}()
    77  
    78  	var (
    79  		prevFileName string
    80  		dupIndex     = rand.Intn(fileCnt-1) + 1
    81  	)
    82  	if len(recExts) == 0 {
    83  		recExts = []string{".txt"}
    84  	}
    85  	for i := range fileCnt {
    86  		var randomName int
    87  		if randNames == nil {
    88  			randomName = rand.Int()
    89  		}
    90  		for _, ext := range recExts {
    91  			var fileName string
    92  			if randNames == nil {
    93  				fileName = fmt.Sprintf("%d%s", randomName, ext) // generate random names
    94  				if dupIndex == i && dup {
    95  					fileName = prevFileName
    96  				}
    97  			} else {
    98  				fileName = randNames[i]
    99  			}
   100  			if err := addBufferToArch(aw, fileName, fileSize, nil); err != nil {
   101  				return err
   102  			}
   103  			prevFileName = fileName
   104  		}
   105  	}
   106  	return nil
   107  }
   108  
   109  func CreateArchCustomFilesToW(w io.Writer, tarFormat tar.Format, ext string, fileCnt, fileSize int,
   110  	customFileType, customFileExt string, missingKeys bool) error {
   111  	aw := archive.NewWriter(ext, w, nil, &archive.Opts{TarFormat: tarFormat})
   112  	defer aw.Fini()
   113  	for range fileCnt {
   114  		fileName := strconv.Itoa(rand.Int()) // generate random names
   115  		if err := addBufferToArch(aw, fileName+".txt", fileSize, nil); err != nil {
   116  			return err
   117  		}
   118  		// If missingKeys enabled we should only add keys randomly
   119  		if !missingKeys || (missingKeys && rand.Intn(2) == 0) {
   120  			var buf []byte
   121  			// random content
   122  			if err := shard.ValidateContentKeyTy(customFileType); err != nil {
   123  				return err
   124  			}
   125  			switch customFileType {
   126  			case shard.ContentKeyInt:
   127  				buf = []byte(strconv.Itoa(rand.Int()))
   128  			case shard.ContentKeyString:
   129  				buf = []byte(fmt.Sprintf("%d-%d", rand.Int(), rand.Int()))
   130  			case shard.ContentKeyFloat:
   131  				buf = []byte(fmt.Sprintf("%d.%d", rand.Int(), rand.Int()))
   132  			default:
   133  				debug.Assert(false, customFileType) // validated above
   134  			}
   135  			if err := addBufferToArch(aw, fileName+customFileExt, len(buf), buf); err != nil {
   136  				return err
   137  			}
   138  		}
   139  	}
   140  	return nil
   141  }
   142  
   143  func CreateArchCustomFiles(shardName string, tarFormat tar.Format, ext string, fileCnt, fileSize int,
   144  	customFileType, customFileExt string, missingKeys bool) error {
   145  	wfh, err := cos.CreateFile(shardName)
   146  	if err != nil {
   147  		return err
   148  	}
   149  	defer wfh.Close()
   150  	return CreateArchCustomFilesToW(wfh, tarFormat, ext, fileCnt, fileSize, customFileType, customFileExt, missingKeys)
   151  }
   152  
   153  func newArchReader(mime string, buffer *bytes.Buffer) (ar archive.Reader, err error) {
   154  	if mime == archive.ExtZip {
   155  		// zip is special
   156  		readerAt := bytes.NewReader(buffer.Bytes())
   157  		ar, err = archive.NewReader(mime, readerAt, int64(buffer.Len()))
   158  	} else {
   159  		ar, err = archive.NewReader(mime, buffer)
   160  	}
   161  	return
   162  }
   163  
   164  func (rcb *rcbCtx) Call(filename string, reader cos.ReadCloseSizer, _ any) (bool, error) {
   165  	var (
   166  		buf bytes.Buffer
   167  		ext = cos.Ext(filename)
   168  	)
   169  	defer reader.Close()
   170  	if rcb.ext == ext {
   171  		if _, err := io.Copy(&buf, reader); err != nil {
   172  			return true, err
   173  		}
   174  	}
   175  	rcb.files = append(rcb.files, FileContent{Name: filename, Ext: ext, Content: buf.Bytes()})
   176  	return false, nil
   177  }
   178  
   179  func GetFilesFromArchBuffer(mime string, buffer bytes.Buffer, extension string) ([]FileContent, error) {
   180  	var (
   181  		rcb = rcbCtx{
   182  			files: make([]FileContent, 0, 10),
   183  			ext:   extension,
   184  		}
   185  		ar, err = newArchReader(mime, &buffer)
   186  	)
   187  	if err != nil {
   188  		return nil, err
   189  	}
   190  	err = ar.ReadUntil(&rcb, cos.EmptyMatchAll, "")
   191  	return rcb.files, err
   192  }
   193  
   194  func (rcb *rcbDummy) Call(filename string, reader cos.ReadCloseSizer, _ any) (bool, error) {
   195  	rcb.files = append(rcb.files, newDummyFile(filename, reader.Size()))
   196  	reader.Close()
   197  	return false, nil
   198  }
   199  
   200  func GetFileInfosFromArchBuffer(buffer bytes.Buffer, mime string) ([]os.FileInfo, error) {
   201  	var (
   202  		rcb = rcbDummy{
   203  			files: make([]os.FileInfo, 0, 10),
   204  		}
   205  		ar, err = newArchReader(mime, &buffer)
   206  	)
   207  	if err != nil {
   208  		return nil, err
   209  	}
   210  	err = ar.ReadUntil(&rcb, cos.EmptyMatchAll, "")
   211  	return rcb.files, err
   212  }
   213  
   214  ///////////////
   215  // dummyFile //
   216  ///////////////
   217  
   218  func newDummyFile(name string, size int64) *dummyFile {
   219  	return &dummyFile{
   220  		name: name,
   221  		size: size,
   222  	}
   223  }
   224  
   225  func (f *dummyFile) Name() string     { return f.name }
   226  func (f *dummyFile) Size() int64      { return f.size }
   227  func (*dummyFile) Mode() os.FileMode  { return 0 }
   228  func (*dummyFile) ModTime() time.Time { return time.Now() }
   229  func (*dummyFile) IsDir() bool        { return false }
   230  func (*dummyFile) Sys() any           { return nil }
   231  
   232  //
   233  // assorted buf pools
   234  //
   235  
   236  func newBuf(l int) (buf []byte) {
   237  	switch {
   238  	case l > cos.MiB:
   239  		debug.Assertf(false, "buf size exceeds 1MB: %d", l)
   240  	case l > 128*cos.KiB:
   241  		return newBuf1m()
   242  	case l > 32*cos.KiB:
   243  		return newBuf128k()
   244  	}
   245  	return newBuf32k()
   246  }
   247  
   248  func freeBuf(buf []byte) {
   249  	c := cap(buf)
   250  	buf = buf[:c]
   251  	switch c {
   252  	case cos.MiB:
   253  		freeBuf1m(buf)
   254  	case 128 * cos.KiB:
   255  		freeBuf128k(buf)
   256  	case 32 * cos.KiB:
   257  		freeBuf32k(buf)
   258  	default:
   259  		debug.Assertf(false, "unexpected buf size: %d", c)
   260  	}
   261  }
   262  
   263  func newBuf1m() (buf []byte) {
   264  	if v := pool1m.Get(); v != nil {
   265  		pbuf := v.(*[]byte)
   266  		buf = *pbuf
   267  	} else {
   268  		buf = make([]byte, cos.MiB)
   269  	}
   270  	return
   271  }
   272  
   273  func freeBuf1m(buf []byte) {
   274  	pool1m.Put(&buf)
   275  }
   276  
   277  func newBuf128k() (buf []byte) {
   278  	if v := pool128k.Get(); v != nil {
   279  		pbuf := v.(*[]byte)
   280  		buf = *pbuf
   281  	} else {
   282  		buf = make([]byte, 128*cos.KiB)
   283  	}
   284  	return
   285  }
   286  
   287  func freeBuf128k(buf []byte) {
   288  	pool128k.Put(&buf)
   289  }
   290  
   291  func newBuf32k() (buf []byte) {
   292  	if v := pool32k.Get(); v != nil {
   293  		pbuf := v.(*[]byte)
   294  		buf = *pbuf
   295  	} else {
   296  		buf = make([]byte, 32*cos.KiB)
   297  	}
   298  	return
   299  }
   300  
   301  func freeBuf32k(buf []byte) {
   302  	pool32k.Put(&buf)
   303  }