github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/tools/tarch/archive.go (about) 1 // Package archive provides common low-level utilities for testing archives 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package tarch 6 7 import ( 8 "archive/tar" 9 "bytes" 10 "fmt" 11 "io" 12 "math/rand" 13 "os" 14 "strconv" 15 "sync" 16 "time" 17 18 "github.com/NVIDIA/aistore/cmn/archive" 19 "github.com/NVIDIA/aistore/cmn/cos" 20 "github.com/NVIDIA/aistore/cmn/debug" 21 "github.com/NVIDIA/aistore/ext/dsort/shard" 22 "github.com/NVIDIA/aistore/tools/cryptorand" 23 ) 24 25 var pool1m, pool128k, pool32k sync.Pool 26 27 var ( 28 _ archive.ArchRCB = (*rcbCtx)(nil) 29 _ archive.ArchRCB = (*rcbDummy)(nil) 30 ) 31 32 type ( 33 FileContent struct { 34 Name string 35 Ext string 36 Content []byte 37 } 38 dummyFile struct { 39 name string 40 size int64 41 } 42 rcbCtx struct { 43 files []FileContent 44 ext string 45 } 46 rcbDummy struct { 47 files []os.FileInfo 48 } 49 ) 50 51 func addBufferToArch(aw archive.Writer, path string, l int, buf []byte) error { 52 if buf == nil { 53 buf = newBuf(l) 54 defer freeBuf(buf) 55 buf = buf[:l] 56 _, err := cryptorand.Read(buf[:l/3]) 57 debug.AssertNoErr(err) 58 copy(buf[2*l/3:], buf) 59 } 60 reader := bytes.NewBuffer(buf) 61 oah := cos.SimpleOAH{Size: int64(l)} 62 return aw.Write(path, oah, reader) 63 } 64 65 func CreateArchRandomFiles(shardName string, tarFormat tar.Format, ext string, fileCnt, fileSize int, 66 dup bool, recExts, randNames []string) error { 67 wfh, err := cos.CreateFile(shardName) 68 if err != nil { 69 return err 70 } 71 72 aw := archive.NewWriter(ext, wfh, nil, &archive.Opts{TarFormat: tarFormat}) 73 defer func() { 74 aw.Fini() 75 wfh.Close() 76 }() 77 78 var ( 79 prevFileName string 80 dupIndex = rand.Intn(fileCnt-1) + 1 81 ) 82 if len(recExts) == 0 { 83 recExts = []string{".txt"} 84 } 85 for i := range fileCnt { 86 var randomName int 87 if randNames == nil { 88 randomName = rand.Int() 89 } 90 for _, ext := range recExts { 91 var fileName string 92 if randNames == nil { 93 fileName = fmt.Sprintf("%d%s", randomName, ext) // generate random names 94 if dupIndex == i && dup { 95 fileName = prevFileName 96 } 97 } else { 98 fileName = randNames[i] 99 } 100 if err := addBufferToArch(aw, fileName, fileSize, nil); err != nil { 101 return err 102 } 103 prevFileName = fileName 104 } 105 } 106 return nil 107 } 108 109 func CreateArchCustomFilesToW(w io.Writer, tarFormat tar.Format, ext string, fileCnt, fileSize int, 110 customFileType, customFileExt string, missingKeys bool) error { 111 aw := archive.NewWriter(ext, w, nil, &archive.Opts{TarFormat: tarFormat}) 112 defer aw.Fini() 113 for range fileCnt { 114 fileName := strconv.Itoa(rand.Int()) // generate random names 115 if err := addBufferToArch(aw, fileName+".txt", fileSize, nil); err != nil { 116 return err 117 } 118 // If missingKeys enabled we should only add keys randomly 119 if !missingKeys || (missingKeys && rand.Intn(2) == 0) { 120 var buf []byte 121 // random content 122 if err := shard.ValidateContentKeyTy(customFileType); err != nil { 123 return err 124 } 125 switch customFileType { 126 case shard.ContentKeyInt: 127 buf = []byte(strconv.Itoa(rand.Int())) 128 case shard.ContentKeyString: 129 buf = []byte(fmt.Sprintf("%d-%d", rand.Int(), rand.Int())) 130 case shard.ContentKeyFloat: 131 buf = []byte(fmt.Sprintf("%d.%d", rand.Int(), rand.Int())) 132 default: 133 debug.Assert(false, customFileType) // validated above 134 } 135 if err := addBufferToArch(aw, fileName+customFileExt, len(buf), buf); err != nil { 136 return err 137 } 138 } 139 } 140 return nil 141 } 142 143 func CreateArchCustomFiles(shardName string, tarFormat tar.Format, ext string, fileCnt, fileSize int, 144 customFileType, customFileExt string, missingKeys bool) error { 145 wfh, err := cos.CreateFile(shardName) 146 if err != nil { 147 return err 148 } 149 defer wfh.Close() 150 return CreateArchCustomFilesToW(wfh, tarFormat, ext, fileCnt, fileSize, customFileType, customFileExt, missingKeys) 151 } 152 153 func newArchReader(mime string, buffer *bytes.Buffer) (ar archive.Reader, err error) { 154 if mime == archive.ExtZip { 155 // zip is special 156 readerAt := bytes.NewReader(buffer.Bytes()) 157 ar, err = archive.NewReader(mime, readerAt, int64(buffer.Len())) 158 } else { 159 ar, err = archive.NewReader(mime, buffer) 160 } 161 return 162 } 163 164 func (rcb *rcbCtx) Call(filename string, reader cos.ReadCloseSizer, _ any) (bool, error) { 165 var ( 166 buf bytes.Buffer 167 ext = cos.Ext(filename) 168 ) 169 defer reader.Close() 170 if rcb.ext == ext { 171 if _, err := io.Copy(&buf, reader); err != nil { 172 return true, err 173 } 174 } 175 rcb.files = append(rcb.files, FileContent{Name: filename, Ext: ext, Content: buf.Bytes()}) 176 return false, nil 177 } 178 179 func GetFilesFromArchBuffer(mime string, buffer bytes.Buffer, extension string) ([]FileContent, error) { 180 var ( 181 rcb = rcbCtx{ 182 files: make([]FileContent, 0, 10), 183 ext: extension, 184 } 185 ar, err = newArchReader(mime, &buffer) 186 ) 187 if err != nil { 188 return nil, err 189 } 190 err = ar.ReadUntil(&rcb, cos.EmptyMatchAll, "") 191 return rcb.files, err 192 } 193 194 func (rcb *rcbDummy) Call(filename string, reader cos.ReadCloseSizer, _ any) (bool, error) { 195 rcb.files = append(rcb.files, newDummyFile(filename, reader.Size())) 196 reader.Close() 197 return false, nil 198 } 199 200 func GetFileInfosFromArchBuffer(buffer bytes.Buffer, mime string) ([]os.FileInfo, error) { 201 var ( 202 rcb = rcbDummy{ 203 files: make([]os.FileInfo, 0, 10), 204 } 205 ar, err = newArchReader(mime, &buffer) 206 ) 207 if err != nil { 208 return nil, err 209 } 210 err = ar.ReadUntil(&rcb, cos.EmptyMatchAll, "") 211 return rcb.files, err 212 } 213 214 /////////////// 215 // dummyFile // 216 /////////////// 217 218 func newDummyFile(name string, size int64) *dummyFile { 219 return &dummyFile{ 220 name: name, 221 size: size, 222 } 223 } 224 225 func (f *dummyFile) Name() string { return f.name } 226 func (f *dummyFile) Size() int64 { return f.size } 227 func (*dummyFile) Mode() os.FileMode { return 0 } 228 func (*dummyFile) ModTime() time.Time { return time.Now() } 229 func (*dummyFile) IsDir() bool { return false } 230 func (*dummyFile) Sys() any { return nil } 231 232 // 233 // assorted buf pools 234 // 235 236 func newBuf(l int) (buf []byte) { 237 switch { 238 case l > cos.MiB: 239 debug.Assertf(false, "buf size exceeds 1MB: %d", l) 240 case l > 128*cos.KiB: 241 return newBuf1m() 242 case l > 32*cos.KiB: 243 return newBuf128k() 244 } 245 return newBuf32k() 246 } 247 248 func freeBuf(buf []byte) { 249 c := cap(buf) 250 buf = buf[:c] 251 switch c { 252 case cos.MiB: 253 freeBuf1m(buf) 254 case 128 * cos.KiB: 255 freeBuf128k(buf) 256 case 32 * cos.KiB: 257 freeBuf32k(buf) 258 default: 259 debug.Assertf(false, "unexpected buf size: %d", c) 260 } 261 } 262 263 func newBuf1m() (buf []byte) { 264 if v := pool1m.Get(); v != nil { 265 pbuf := v.(*[]byte) 266 buf = *pbuf 267 } else { 268 buf = make([]byte, cos.MiB) 269 } 270 return 271 } 272 273 func freeBuf1m(buf []byte) { 274 pool1m.Put(&buf) 275 } 276 277 func newBuf128k() (buf []byte) { 278 if v := pool128k.Get(); v != nil { 279 pbuf := v.(*[]byte) 280 buf = *pbuf 281 } else { 282 buf = make([]byte, 128*cos.KiB) 283 } 284 return 285 } 286 287 func freeBuf128k(buf []byte) { 288 pool128k.Put(&buf) 289 } 290 291 func newBuf32k() (buf []byte) { 292 if v := pool32k.Get(); v != nil { 293 pbuf := v.(*[]byte) 294 buf = *pbuf 295 } else { 296 buf = make([]byte, 32*cos.KiB) 297 } 298 return 299 } 300 301 func freeBuf32k(buf []byte) { 302 pool32k.Put(&buf) 303 }