github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/cmn/archive/write.go (about) 1 // Package archive: write, read, copy, append, list primitives 2 // across all supported formats 3 /* 4 * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 5 */ 6 package archive 7 8 import ( 9 "archive/tar" 10 "archive/zip" 11 "compress/gzip" 12 "io" 13 "os" 14 "sync" 15 "time" 16 17 "github.com/NVIDIA/aistore/cmn" 18 "github.com/NVIDIA/aistore/cmn/cos" 19 "github.com/NVIDIA/aistore/cmn/debug" 20 "github.com/NVIDIA/aistore/cmn/feat" 21 "github.com/NVIDIA/aistore/memsys" 22 "github.com/pierrec/lz4/v3" 23 ) 24 25 type ( 26 HeaderCallback func(any) 27 Opts struct { 28 CB HeaderCallback 29 TarFormat tar.Format 30 Serialize bool 31 } 32 ) 33 34 type ( 35 Writer interface { 36 // Init specific writer 37 Write(nameInArch string, oah cos.OAH, reader io.Reader) error 38 // Close, cleanup 39 Fini() 40 // Copy arch, with potential subsequent APPEND 41 Copy(src io.Reader, size ...int64) error 42 43 // private 44 init(w io.Writer, cksum *cos.CksumHashSize, opts *Opts) 45 } 46 baseW struct { 47 wmul io.Writer 48 lck sync.Locker // serialize: (multi-object => single shard) 49 buf []byte 50 cb HeaderCallback 51 slab *memsys.Slab 52 } 53 tarWriter struct { 54 baseW 55 format tar.Format 56 tw *tar.Writer 57 } 58 tgzWriter struct { 59 tw tarWriter 60 gzw *gzip.Writer 61 } 62 zipWriter struct { 63 baseW 64 zw *zip.Writer 65 } 66 lz4Writer struct { 67 tw tarWriter 68 lzw *lz4.Writer 69 } 70 ) 71 72 // interface guard 73 var ( 74 _ Writer = (*tarWriter)(nil) 75 _ Writer = (*tgzWriter)(nil) 76 _ Writer = (*zipWriter)(nil) 77 _ Writer = (*lz4Writer)(nil) 78 ) 79 80 // calls init() -> open(),alloc() 81 func NewWriter(mime string, w io.Writer, cksum *cos.CksumHashSize, opts *Opts) (aw Writer) { 82 switch mime { 83 case ExtTar: 84 aw = &tarWriter{} 85 case ExtTgz, ExtTarGz: 86 aw = &tgzWriter{} 87 case ExtZip: 88 aw = &zipWriter{} 89 case ExtTarLz4: 90 aw = &lz4Writer{} 91 default: 92 debug.Assert(false, mime) 93 } 94 aw.init(w, cksum, opts) 95 return 96 } 97 98 // baseW 99 100 func (bw *baseW) init(w io.Writer, cksum *cos.CksumHashSize, opts *Opts) { 101 bw.buf, bw.slab = memsys.PageMM().Alloc() 102 103 bw.lck = cos.NopLocker{} 104 bw.cb = nopTarHeader 105 if opts != nil { 106 if opts.CB != nil { 107 bw.cb = opts.CB 108 } 109 if opts.Serialize { 110 bw.lck = &sync.Mutex{} 111 } 112 } 113 bw.wmul = w 114 if cksum != nil { 115 bw.wmul = cos.NewWriterMulti(w, cksum) 116 } 117 } 118 119 // tarWriter 120 121 func (tw *tarWriter) init(w io.Writer, cksum *cos.CksumHashSize, opts *Opts) { 122 tw.baseW.init(w, cksum, opts) 123 124 tw.format = tar.FormatUnknown // default 125 if opts != nil { 126 tw.format = opts.TarFormat 127 } 128 debug.Assert(tw.format == tar.FormatUnknown || tw.format == tar.FormatUSTAR || 129 tw.format == tar.FormatPAX || tw.format == tar.FormatGNU, tw.format.String()) 130 131 tw.tw = tar.NewWriter(tw.wmul) 132 } 133 134 func (tw *tarWriter) Fini() { 135 tw.slab.Free(tw.buf) 136 tw.tw.Close() 137 } 138 139 func (tw *tarWriter) Write(fullname string, oah cos.OAH, reader io.Reader) (err error) { 140 hdr := tar.Header{ 141 Typeflag: tar.TypeReg, 142 Name: fullname, 143 Size: oah.SizeBytes(), 144 ModTime: time.Unix(0, oah.AtimeUnix()), 145 Mode: int64(cos.PermRWRR), 146 Format: tw.format, 147 } 148 tw.cb(&hdr) 149 tw.lck.Lock() 150 if err = tw.tw.WriteHeader(&hdr); err == nil { 151 _, err = io.CopyBuffer(tw.tw, reader, tw.buf) 152 } 153 tw.lck.Unlock() 154 return err 155 } 156 157 func (tw *tarWriter) Copy(src io.Reader, _ ...int64) error { 158 return cpTar(src, tw.tw, tw.buf) 159 } 160 161 // set Uid/Gid bits in TAR header 162 // - note: cos.PermRWRR default 163 // - not calling standard tar.FileInfoHeader 164 165 func nopTarHeader(any) {} 166 167 func SetTarHeader(hdr any) { 168 thdr := hdr.(*tar.Header) 169 { 170 thdr.Uid = os.Getuid() 171 thdr.Gid = os.Getgid() 172 } 173 } 174 175 // tgzWriter 176 177 func (tzw *tgzWriter) init(w io.Writer, cksum *cos.CksumHashSize, opts *Opts) { 178 tzw.tw.baseW.init(w, cksum, opts) 179 tzw.gzw = gzip.NewWriter(tzw.tw.wmul) 180 tzw.tw.tw = tar.NewWriter(tzw.gzw) 181 } 182 183 func (tzw *tgzWriter) Fini() { 184 tzw.tw.Fini() 185 tzw.gzw.Close() 186 } 187 188 func (tzw *tgzWriter) Write(fullname string, oah cos.OAH, reader io.Reader) error { 189 return tzw.tw.Write(fullname, oah, reader) 190 } 191 192 func (tzw *tgzWriter) Copy(src io.Reader, _ ...int64) error { 193 gzr, err := gzip.NewReader(src) 194 if err != nil { 195 return err 196 } 197 err = cpTar(gzr, tzw.tw.tw, tzw.tw.buf) 198 cos.Close(gzr) 199 return err 200 } 201 202 // zipWriter 203 204 func (zw *zipWriter) init(w io.Writer, cksum *cos.CksumHashSize, opts *Opts) { 205 zw.baseW.init(w, cksum, opts) 206 zw.zw = zip.NewWriter(zw.wmul) 207 } 208 209 func (zw *zipWriter) Fini() { 210 zw.slab.Free(zw.buf) 211 zw.zw.Close() 212 } 213 214 func (zw *zipWriter) Write(fullname string, oah cos.OAH, reader io.Reader) error { 215 ziphdr := zip.FileHeader{ 216 Name: fullname, 217 Comment: fullname, 218 UncompressedSize64: uint64(oah.SizeBytes()), 219 Modified: time.Unix(0, oah.AtimeUnix()), 220 } 221 zw.cb(&ziphdr) 222 zw.lck.Lock() 223 zipw, err := zw.zw.CreateHeader(&ziphdr) 224 if err == nil { 225 _, err = io.CopyBuffer(zipw, reader, zw.buf) 226 } 227 zw.lck.Unlock() 228 return err 229 } 230 231 func (zw *zipWriter) Copy(src io.Reader, size ...int64) error { 232 r, ok := src.(io.ReaderAt) 233 debug.Assert(ok && len(size) == 1) 234 return cpZip(r, size[0], zw.zw, zw.buf) 235 } 236 237 // lz4Writer 238 239 func (lzw *lz4Writer) init(w io.Writer, cksum *cos.CksumHashSize, opts *Opts) { 240 lzw.tw.baseW.init(w, cksum, opts) 241 lzw.lzw = lz4.NewWriter(lzw.tw.wmul) 242 243 lzw.lzw.Header.BlockChecksum = false 244 lzw.lzw.Header.NoChecksum = !cmn.Rom.Features().IsSet(feat.LZ4FrameChecksum) 245 lzw.lzw.Header.BlockMaxSize = 256 * cos.KiB 246 if cmn.Rom.Features().IsSet(feat.LZ4Block1MB) { 247 lzw.lzw.Header.BlockMaxSize = cos.MiB 248 } 249 250 lzw.tw.tw = tar.NewWriter(lzw.lzw) 251 } 252 253 func (lzw *lz4Writer) Fini() { 254 lzw.tw.Fini() 255 lzw.lzw.Close() 256 } 257 258 func (lzw *lz4Writer) Write(fullname string, oah cos.OAH, reader io.Reader) error { 259 return lzw.tw.Write(fullname, oah, reader) 260 } 261 262 func (lzw *lz4Writer) Copy(src io.Reader, _ ...int64) error { 263 lzr := lz4.NewReader(src) 264 return cpTar(lzr, lzw.tw.tw, lzw.tw.buf) 265 }