github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/fileset/unordered_writer.go (about) 1 package fileset 2 3 import ( 4 "bytes" 5 "context" 6 "io" 7 "path" 8 "sort" 9 "time" 10 11 "github.com/pachyderm/pachyderm/src/client/pkg/errors" 12 "github.com/pachyderm/pachyderm/src/server/pkg/storage/renew" 13 "github.com/pachyderm/pachyderm/src/server/pkg/tar" 14 ) 15 16 type memFile struct { 17 path string 18 parts map[string]*memPart 19 } 20 21 type memPart struct { 22 tag string 23 buf *bytes.Buffer 24 } 25 26 func (mp *memPart) Write(data []byte) (int, error) { 27 return mp.buf.Write(data) 28 } 29 30 type memFileSet struct { 31 additive map[string]*memFile 32 deletive map[string]*memFile 33 } 34 35 func newMemFileSet() *memFileSet { 36 return &memFileSet{ 37 additive: make(map[string]*memFile), 38 deletive: make(map[string]*memFile), 39 } 40 } 41 42 func (mfs *memFileSet) appendFile(p string, tag string) io.Writer { 43 return mfs.createMemPart(p, tag) 44 } 45 46 func (mfs *memFileSet) createMemPart(p string, tag string) *memPart { 47 if _, ok := mfs.additive[p]; !ok { 48 mfs.additive[p] = &memFile{ 49 path: p, 50 parts: make(map[string]*memPart), 51 } 52 } 53 mf := mfs.additive[p] 54 if _, ok := mf.parts[tag]; !ok { 55 mf.parts[tag] = &memPart{ 56 tag: tag, 57 buf: &bytes.Buffer{}, 58 } 59 } 60 return mf.parts[tag] 61 } 62 63 func (mfs *memFileSet) deleteFile(p, tag string) { 64 if tag == "" { 65 delete(mfs.additive, p) 66 mfs.deletive[p] = &memFile{path: p} 67 return 68 } 69 if mf, ok := mfs.additive[p]; ok { 70 delete(mf.parts, tag) 71 } 72 if _, ok := mfs.deletive[p]; !ok { 73 mfs.deletive[p] = &memFile{ 74 path: p, 75 parts: make(map[string]*memPart), 76 } 77 } 78 mf := mfs.deletive[p] 79 mf.parts[tag] = &memPart{tag: tag} 80 } 81 82 func (mfs *memFileSet) serialize(w *Writer) error { 83 if err := mfs.serializeAdditive(w); err != nil { 84 return err 85 } 86 return mfs.serializeDeletive(w) 87 } 88 89 func (mfs *memFileSet) serializeAdditive(w *Writer) error { 90 for _, mf := range sortMemFiles(mfs.additive) { 91 if err := w.Append(mf.path, func(fw *FileWriter) error { 92 return serializeParts(fw, mf) 93 }); err != nil { 94 return err 95 } 96 } 97 return nil 98 } 99 100 func serializeParts(fw *FileWriter, mf *memFile) error { 101 for _, mp := range sortMemParts(mf.parts) { 102 fw.Append(mp.tag) 103 if _, err := fw.Write(mp.buf.Bytes()); err != nil { 104 return err 105 } 106 } 107 return nil 108 } 109 110 func (mfs *memFileSet) serializeDeletive(w *Writer) error { 111 for _, mf := range sortMemFiles(mfs.deletive) { 112 var tags []string 113 for _, mp := range sortMemParts(mf.parts) { 114 tags = append(tags, mp.tag) 115 } 116 w.Delete(mf.path, tags...) 117 } 118 return nil 119 } 120 121 func sortMemFiles(mfs map[string]*memFile) []*memFile { 122 var result []*memFile 123 for _, mf := range mfs { 124 result = append(result, mf) 125 } 126 sort.SliceStable(result, func(i, j int) bool { 127 return result[i].path < result[j].path 128 }) 129 return result 130 } 131 132 func sortMemParts(mps map[string]*memPart) []*memPart { 133 var result []*memPart 134 for _, mp := range mps { 135 result = append(result, mp) 136 } 137 sort.SliceStable(result, func(i, j int) bool { 138 return result[i].tag < result[j].tag 139 }) 140 return result 141 } 142 143 // UnorderedWriter allows writing Files, unordered by path, into multiple ordered filesets. 144 // This may be a full filesystem or a subfilesystem (e.g. datum / datum set / shard). 145 type UnorderedWriter struct { 146 ctx context.Context 147 storage *Storage 148 memAvailable, memThreshold int64 149 name string 150 defaultTag string 151 memFileSet *memFileSet 152 subFileSet int64 153 ttl time.Duration 154 renewer *renew.StringSet 155 } 156 157 func newUnorderedWriter(ctx context.Context, storage *Storage, name string, memThreshold int64, defaultTag string, opts ...UnorderedWriterOption) (*UnorderedWriter, error) { 158 if err := storage.filesetSem.Acquire(ctx, 1); err != nil { 159 return nil, err 160 } 161 uw := &UnorderedWriter{ 162 ctx: ctx, 163 storage: storage, 164 memAvailable: memThreshold, 165 memThreshold: memThreshold, 166 name: name, 167 defaultTag: defaultTag, 168 memFileSet: newMemFileSet(), 169 } 170 for _, opt := range opts { 171 opt(uw) 172 } 173 return uw, nil 174 } 175 176 // Put reads files from a tar stream and adds them to the fileset. 177 // TODO: Make overwrite work with tags. 178 func (uw *UnorderedWriter) Put(r io.Reader, overwrite bool, customTag ...string) error { 179 tag := uw.defaultTag 180 if len(customTag) > 0 && customTag[0] != "" { 181 tag = customTag[0] 182 } 183 tr := tar.NewReader(r) 184 for { 185 hdr, err := tr.Next() 186 if err != nil { 187 if errors.Is(err, io.EOF) { 188 return nil 189 } 190 return err 191 } 192 p := Clean(hdr.Name, hdr.FileInfo().IsDir()) 193 if hdr.Typeflag == tar.TypeDir { 194 continue 195 } 196 // TODO: Tag overwrite? 197 if overwrite { 198 uw.memFileSet.deleteFile(p, "") 199 } 200 w := uw.memFileSet.appendFile(p, tag) 201 for { 202 n, err := io.CopyN(w, tr, uw.memAvailable) 203 uw.memAvailable -= n 204 if err != nil { 205 if errors.Is(err, io.EOF) { 206 break 207 } 208 return err 209 } 210 if uw.memAvailable == 0 { 211 if err := uw.serialize(); err != nil { 212 return err 213 } 214 w = uw.memFileSet.appendFile(p, tag) 215 } 216 } 217 } 218 } 219 220 // Delete deletes a file from the file set. 221 // TODO: Directory deletion needs more invariant checks. 222 // Right now you have to specify the trailing slash explicitly. 223 func (uw *UnorderedWriter) Delete(name string, tags ...string) { 224 name = Clean(name, IsDir(name)) 225 var tag string 226 if len(tag) > 0 { 227 tag = tags[0] 228 } 229 uw.memFileSet.deleteFile(name, tag) 230 } 231 232 // serialize will be called whenever the in-memory file set is past the memory threshold. 233 // A new in-memory file set will be created for the following operations. 234 func (uw *UnorderedWriter) serialize() error { 235 // Serialize file set. 236 var writerOpts []WriterOption 237 if uw.ttl > 0 { 238 writerOpts = append(writerOpts, WithTTL(uw.ttl)) 239 } 240 p := path.Join(uw.name, SubFileSetStr(uw.subFileSet)) 241 w := uw.storage.newWriter(uw.ctx, p, writerOpts...) 242 if err := uw.memFileSet.serialize(w); err != nil { 243 return err 244 } 245 if err := w.Close(); err != nil { 246 return err 247 } 248 if uw.renewer != nil { 249 uw.renewer.Add(p) 250 } 251 // Reset in-memory file set. 252 uw.memFileSet = newMemFileSet() 253 uw.memAvailable = uw.memThreshold 254 uw.subFileSet++ 255 return nil 256 } 257 258 // Close closes the writer. 259 func (uw *UnorderedWriter) Close() error { 260 defer uw.storage.filesetSem.Release(1) 261 return uw.serialize() 262 }