github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/dsort/shard/zip.go (about) 1 // Package shard provides Extract(shard), Create(shard), and associated methods 2 // across all suppported archival formats (see cmn/archive/mime.go) 3 /* 4 * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 5 */ 6 package shard 7 8 import ( 9 "archive/zip" 10 "io" 11 12 "github.com/NVIDIA/aistore/cmn/archive" 13 "github.com/NVIDIA/aistore/cmn/cos" 14 "github.com/NVIDIA/aistore/cmn/debug" 15 "github.com/NVIDIA/aistore/core" 16 "github.com/NVIDIA/aistore/memsys" 17 jsoniter "github.com/json-iterator/go" 18 ) 19 20 type ( 21 zipRW struct { 22 ext string 23 } 24 25 zipFileHeader struct { 26 Name string `json:"name"` 27 Comment string `json:"comment"` 28 } 29 30 // zipRecordDataReader is used for writing metadata as well as data to the buffer. 31 zipRecordDataReader struct { 32 slab *memsys.Slab 33 34 metadataSize int64 35 size int64 36 written int64 37 metadataBuf []byte 38 header zipFileHeader 39 zipWriter *zip.Writer 40 41 writer io.Writer 42 } 43 ) 44 45 // interface guard 46 var _ RW = (*zipRW)(nil) 47 48 /////////// 49 // zipRW // 50 /////////// 51 52 func NewZipRW() RW { return &zipRW{ext: archive.ExtZip} } 53 54 func (*zipRW) IsCompressed() bool { return true } 55 func (*zipRW) SupportsOffset() bool { return false } 56 func (*zipRW) MetadataSize() int64 { return 0 } // zip does not have header size 57 58 // Extract reads the tarball f and extracts its metadata. 59 func (zrw *zipRW) Extract(lom *core.LOM, r cos.ReadReaderAt, extractor RecordExtractor, toDisk bool) (int64, int, error) { 60 ar, err := archive.NewReader(zrw.ext, r, lom.SizeBytes()) 61 if err != nil { 62 return 0, 0, err 63 } 64 c := &rcbCtx{parent: zrw, extractor: extractor, shardName: lom.ObjName, toDisk: toDisk, fromTar: false} 65 buf, slab := core.T.PageMM().AllocSize(lom.SizeBytes()) 66 c.buf = buf 67 68 err = ar.ReadUntil(c, cos.EmptyMatchAll, "") 69 70 slab.Free(buf) 71 return c.extractedSize, c.extractedCount, err 72 } 73 74 // Create creates a new shard locally based on the Shard. 75 // Note that the order of closing must be trw, gzw, then finally tarball. 76 func (*zipRW) Create(s *Shard, w io.Writer, loader ContentLoader) (written int64, err error) { 77 var n int64 78 zw := zip.NewWriter(w) 79 defer cos.Close(zw) 80 81 rdReader := newZipRecordDataReader() 82 for _, rec := range s.Records.All() { 83 for _, obj := range rec.Objects { 84 rdReader.reinit(zw, obj.Size, obj.MetadataSize) 85 if n, err = loader.Load(rdReader, rec, obj); err != nil { 86 return written + n, err 87 } 88 89 written += n 90 } 91 } 92 rdReader.free() 93 return written, nil 94 } 95 96 ///////////////////////// 97 // zipRecordDataReader // 98 ///////////////////////// 99 100 func newZipRecordDataReader() *zipRecordDataReader { 101 rd := &zipRecordDataReader{} 102 rd.metadataBuf, rd.slab = core.T.ByteMM().Alloc() 103 return rd 104 } 105 106 func (rd *zipRecordDataReader) reinit(zw *zip.Writer, size, metadataSize int64) { 107 rd.zipWriter = zw 108 rd.written = 0 109 rd.size = size 110 rd.metadataSize = metadataSize 111 } 112 113 func (rd *zipRecordDataReader) free() { 114 rd.slab.Free(rd.metadataBuf) 115 } 116 117 func (rd *zipRecordDataReader) Write(p []byte) (int, error) { 118 // Read header and initialize file writer 119 remainingMetadataSize := rd.metadataSize - rd.written 120 if remainingMetadataSize > 0 { 121 writeN := int64(len(p)) 122 if writeN < remainingMetadataSize { 123 debug.Assert(int64(len(rd.metadataBuf))-rd.written >= writeN) 124 copy(rd.metadataBuf[rd.written:], p) 125 rd.written += writeN 126 return len(p), nil 127 } 128 debug.Assert(int64(len(rd.metadataBuf))-rd.written >= remainingMetadataSize) 129 130 copy(rd.metadataBuf[rd.written:], p[:remainingMetadataSize]) 131 rd.written += remainingMetadataSize 132 p = p[remainingMetadataSize:] 133 var metadata zipFileHeader 134 if err := jsoniter.Unmarshal(rd.metadataBuf[:rd.metadataSize], &metadata); err != nil { 135 return int(remainingMetadataSize), err 136 } 137 138 rd.header = metadata 139 writer, err := rd.zipWriter.Create(rd.header.Name) 140 if err != nil { 141 return int(remainingMetadataSize), err 142 } 143 if err := rd.zipWriter.SetComment(rd.header.Comment); err != nil { 144 return int(remainingMetadataSize), err 145 } 146 rd.writer = writer 147 } else { 148 remainingMetadataSize = 0 149 } 150 151 n, err := rd.writer.Write(p) 152 rd.written += int64(n) 153 return n + int(remainingMetadataSize), err 154 }