github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/tools/readers/readers.go (about) 1 // Package readers provides implementation for common reader types 2 /* 3 * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package readers 6 7 import ( 8 "archive/tar" 9 "bytes" 10 "errors" 11 "fmt" 12 "io" 13 "math/rand" 14 "os" 15 "path" 16 17 "github.com/NVIDIA/aistore/cmn/archive" 18 "github.com/NVIDIA/aistore/cmn/cos" 19 "github.com/NVIDIA/aistore/cmn/debug" 20 "github.com/NVIDIA/aistore/cmn/mono" 21 "github.com/NVIDIA/aistore/ext/dsort/shard" 22 "github.com/NVIDIA/aistore/memsys" 23 "github.com/NVIDIA/aistore/tools/tarch" 24 ) 25 26 const ( 27 // TypeFile defines the name for file reader 28 TypeFile = "file" 29 // TypeSG defines the name for sg reader 30 TypeSG = "sg" 31 // TypeRand defines the name for rand reader 32 TypeRand = "rand" 33 // TypeTar defines the name for random TAR reader 34 TypeTar = "tar" 35 ) 36 37 type ( 38 Reader interface { 39 cos.ReadOpenCloser 40 io.Seeker 41 Cksum() *cos.Cksum 42 } 43 randReader struct { 44 seed int64 45 rnd *rand.Rand 46 size int64 47 offset int64 48 cksum *cos.Cksum 49 } 50 tarReader struct { 51 b []byte 52 bytes.Reader 53 cksum *cos.Cksum 54 } 55 rrLimited struct { 56 random *rand.Rand 57 size int64 58 off int64 59 } 60 fileReader struct { 61 *os.File 62 filePath string // Example: "/dir/ais/" 63 name string // Example: "smoke/bGzhWKWoxHDSePnELftx" 64 cksum *cos.Cksum 65 } 66 sgReader struct { 67 memsys.Reader 68 cksum *cos.Cksum 69 } 70 bytesReader struct { 71 *bytes.Buffer 72 buf []byte 73 } 74 75 // (aisloader only) 76 Params struct { 77 Type string // file | sg | inmem | rand 78 SGL *memsys.SGL // When Type == sg 79 Path, Name string // When Type == file; path and name of file to be created (if not already existing) 80 Size int64 81 } 82 ) 83 84 // interface guard 85 var ( 86 _ Reader = (*randReader)(nil) 87 _ Reader = (*tarReader)(nil) 88 _ Reader = (*fileReader)(nil) 89 _ Reader = (*sgReader)(nil) 90 ) 91 92 //////////////// 93 // randReader // 94 //////////////// 95 96 func NewRand(size int64, cksumType string) (Reader, error) { 97 var ( 98 cksum *cos.Cksum 99 seed = mono.NanoTime() 100 ) 101 rand1 := rand.New(rand.NewSource(seed)) 102 if cksumType != cos.ChecksumNone { 103 rr := &rrLimited{rand1, size, 0} 104 _, cksumHash, err := cos.CopyAndChecksum(io.Discard, rr, nil, cksumType) 105 if err != nil { 106 return nil, err 107 } 108 cksum = cksumHash.Clone() 109 } 110 rand1dup := rand.New(rand.NewSource(seed)) 111 return &randReader{ 112 seed: seed, 113 rnd: rand1dup, 114 size: size, 115 cksum: cksum, 116 }, nil 117 } 118 119 func (r *randReader) Read(buf []byte) (int, error) { 120 available := r.size - r.offset 121 if available == 0 { 122 return 0, io.EOF 123 } 124 125 want := int64(len(buf)) 126 n := min(want, available) 127 actual, err := r.rnd.Read(buf[:n]) 128 if err != nil { 129 return 0, nil 130 } 131 132 r.offset += int64(actual) 133 return actual, nil 134 } 135 136 // Open implements the Reader interface. 137 // Returns a new rand reader using the same seed. 138 func (r *randReader) Open() (cos.ReadOpenCloser, error) { 139 return &randReader{ 140 seed: r.seed, 141 rnd: rand.New(rand.NewSource(r.seed)), 142 size: r.size, 143 cksum: r.cksum, 144 }, nil 145 } 146 147 // Close implements the Reader interface. 148 func (*randReader) Close() error { return nil } 149 150 // Seek implements the Reader interface. 151 func (r *randReader) Seek(offset int64, whence int) (int64, error) { 152 var abs int64 153 154 switch whence { 155 case io.SeekStart: 156 abs = offset 157 case io.SeekCurrent: 158 abs = r.offset + offset 159 case io.SeekEnd: 160 abs = r.size + offset 161 default: 162 return 0, errors.New("invalid whence") 163 } 164 165 if abs < 0 { 166 return 0, errors.New("negative offset position") 167 } 168 169 if abs >= r.size { 170 r.offset = r.size 171 return r.offset, nil 172 } 173 174 r.rnd = rand.New(rand.NewSource(r.seed)) 175 r.offset = 0 176 actual, err := io.CopyN(io.Discard, r, abs) 177 if err != nil { 178 return 0, err 179 } 180 181 if actual != abs { 182 err := fmt.Errorf("failed to seek to %d, seeked to %d instead", offset, actual) 183 return 0, err 184 } 185 186 return abs, nil 187 } 188 189 // XXHash implements the Reader interface. 190 func (r *randReader) Cksum() *cos.Cksum { 191 return r.cksum 192 } 193 194 func (rr *rrLimited) Read(p []byte) (n int, err error) { 195 rem := int(min(rr.size-rr.off, int64(len(p)))) 196 n, _ = rr.random.Read(p[:rem]) // never fails 197 rr.off += int64(n) 198 if rem < len(p) { 199 err = io.EOF 200 } 201 return 202 } 203 204 //////////////// 205 // fileReader // 206 //////////////// 207 208 // creates/opens the file, populates it with random data, and returns a new fileReader 209 // NOTE: Caller is responsible for closing. 210 func NewRandFile(filepath, name string, size int64, cksumType string) (Reader, error) { 211 var ( 212 cksum *cos.Cksum 213 cksumHash *cos.CksumHash 214 fn = path.Join(filepath, name) 215 f, err = os.OpenFile(fn, os.O_RDWR|os.O_CREATE, cos.PermRWR) 216 exists bool 217 ) 218 if err != nil { 219 return nil, err 220 } 221 if size == -1 { 222 // checksum existing file 223 exists = true 224 if cksumType != cos.ChecksumNone { 225 debug.Assert(cksumType != "") 226 _, cksumHash, err = cos.CopyAndChecksum(io.Discard, f, nil, cksumType) 227 } 228 } else { 229 // Write random file 230 cksumHash, err = copyRandWithHash(f, size, cksumType, cos.NowRand()) 231 } 232 if err == nil { 233 _, err = f.Seek(0, io.SeekStart) 234 } 235 236 if err != nil { 237 // cleanup and ret 238 f.Close() 239 if !exists { 240 os.Remove(fn) 241 } 242 return nil, err 243 } 244 245 if cksumType != cos.ChecksumNone { 246 cksum = cksumHash.Clone() 247 } 248 return &fileReader{f, filepath, name, cksum}, nil 249 } 250 251 // NewExistingFile opens an existing file, reads it to compute checksum, and returns a new reader. 252 // NOTE: Caller responsible for closing. 253 func NewExistingFile(fn, cksumType string) (Reader, error) { 254 return NewRandFile(fn, "", -1, cksumType) 255 } 256 257 func (r *fileReader) Open() (cos.ReadOpenCloser, error) { 258 cksumType := cos.ChecksumNone 259 if r.cksum != nil { 260 cksumType = r.cksum.Type() 261 } 262 return NewRandFile(r.filePath, r.name, -1, cksumType) 263 } 264 265 // XXHash implements the Reader interface. 266 func (r *fileReader) Cksum() *cos.Cksum { 267 return r.cksum 268 } 269 270 ////////////// 271 // sgReader // 272 ////////////// 273 274 func NewSG(sgl *memsys.SGL, size int64, cksumType string) (Reader, error) { 275 var cksum *cos.Cksum 276 if size > 0 { 277 cksumHash, err := copyRandWithHash(sgl, size, cksumType, cos.NowRand()) 278 if err != nil { 279 return nil, err 280 } 281 if cksumType != cos.ChecksumNone { 282 cksum = cksumHash.Clone() 283 } 284 } 285 286 r := memsys.NewReader(sgl) 287 return &sgReader{*r, cksum}, nil 288 } 289 290 func (r *sgReader) Cksum() *cos.Cksum { 291 return r.cksum 292 } 293 294 ///////////////// 295 // bytesReader // 296 ///////////////// 297 298 func NewBytes(buf []byte) Reader { return &bytesReader{bytes.NewBuffer(buf), buf} } 299 func (*bytesReader) Close() error { return nil } 300 func (*bytesReader) Cksum() *cos.Cksum { return nil } 301 func (*bytesReader) Seek(int64, int) (int64, error) { return 0, nil } 302 303 func (r *bytesReader) Open() (cos.ReadOpenCloser, error) { 304 return &bytesReader{bytes.NewBuffer(r.buf), r.buf}, nil 305 } 306 307 /////////////// 308 // tarReader // 309 /////////////// 310 311 func newTarReader(size int64, cksumType string) (r Reader, err error) { 312 var ( 313 singleFileSize = min(size, int64(cos.KiB)) 314 buff = bytes.NewBuffer(nil) 315 ) 316 err = tarch.CreateArchCustomFilesToW(buff, tar.FormatUnknown, archive.ExtTar, max(int(size/singleFileSize), 1), 317 int(singleFileSize), shard.ContentKeyInt, ".cls", true) 318 if err != nil { 319 return nil, err 320 } 321 cksum, err := cos.ChecksumBytes(buff.Bytes(), cksumType) 322 if err != nil { 323 return nil, err 324 } 325 return &tarReader{ 326 b: buff.Bytes(), 327 Reader: *bytes.NewReader(buff.Bytes()), 328 cksum: cksum, 329 }, err 330 } 331 332 func (*tarReader) Close() error { return nil } 333 func (r *tarReader) Cksum() *cos.Cksum { return r.cksum } 334 335 func (r *tarReader) Open() (cos.ReadOpenCloser, error) { 336 return &tarReader{ 337 Reader: *bytes.NewReader(r.b), 338 cksum: r.cksum, 339 b: r.b, 340 }, nil 341 } 342 343 // 344 // for convenience 345 // 346 347 func New(p Params, cksumType string) (Reader, error) { 348 switch p.Type { 349 case TypeSG: 350 debug.Assert(p.SGL != nil) 351 return NewSG(p.SGL, p.Size, cksumType) 352 case TypeRand: 353 return NewRand(p.Size, cksumType) 354 case TypeFile: 355 return NewRandFile(p.Path, p.Name, p.Size, cksumType) 356 case TypeTar: 357 return newTarReader(p.Size, cksumType) 358 default: 359 return nil, errors.New("unknown memory type for creating inmem reader") 360 } 361 } 362 363 // copyRandWithHash reads data from random source and writes it to a writer while 364 // optionally computing xxhash 365 // See related: memsys_test.copyRand 366 func copyRandWithHash(w io.Writer, size int64, cksumType string, rnd *rand.Rand) (*cos.CksumHash, error) { 367 var ( 368 cksum *cos.CksumHash 369 rem = size 370 buf, s = memsys.PageMM().Alloc() 371 blkSize = int64(len(buf)) 372 ) 373 defer s.Free(buf) 374 375 if cksumType != cos.ChecksumNone { 376 cksum = cos.NewCksumHash(cksumType) 377 } 378 for i := int64(0); i <= size/blkSize; i++ { 379 n := int(min(blkSize, rem)) 380 rnd.Read(buf[:n]) 381 m, err := w.Write(buf[:n]) 382 if err != nil { 383 return nil, err 384 } 385 if cksumType != cos.ChecksumNone { 386 cksum.H.Write(buf[:m]) 387 } 388 debug.Assert(m == n) 389 rem -= int64(m) 390 } 391 if cksumType != cos.ChecksumNone { 392 cksum.Finalize() 393 } 394 return cksum, nil 395 }