github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/fileset/unordered_writer.go (about)

     1  package fileset
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"io"
     7  	"path"
     8  	"sort"
     9  	"time"
    10  
    11  	"github.com/pachyderm/pachyderm/src/client/pkg/errors"
    12  	"github.com/pachyderm/pachyderm/src/server/pkg/storage/renew"
    13  	"github.com/pachyderm/pachyderm/src/server/pkg/tar"
    14  )
    15  
    16  type memFile struct {
    17  	path  string
    18  	parts map[string]*memPart
    19  }
    20  
    21  type memPart struct {
    22  	tag string
    23  	buf *bytes.Buffer
    24  }
    25  
    26  func (mp *memPart) Write(data []byte) (int, error) {
    27  	return mp.buf.Write(data)
    28  }
    29  
    30  type memFileSet struct {
    31  	additive map[string]*memFile
    32  	deletive map[string]*memFile
    33  }
    34  
    35  func newMemFileSet() *memFileSet {
    36  	return &memFileSet{
    37  		additive: make(map[string]*memFile),
    38  		deletive: make(map[string]*memFile),
    39  	}
    40  }
    41  
    42  func (mfs *memFileSet) appendFile(p string, tag string) io.Writer {
    43  	return mfs.createMemPart(p, tag)
    44  }
    45  
    46  func (mfs *memFileSet) createMemPart(p string, tag string) *memPart {
    47  	if _, ok := mfs.additive[p]; !ok {
    48  		mfs.additive[p] = &memFile{
    49  			path:  p,
    50  			parts: make(map[string]*memPart),
    51  		}
    52  	}
    53  	mf := mfs.additive[p]
    54  	if _, ok := mf.parts[tag]; !ok {
    55  		mf.parts[tag] = &memPart{
    56  			tag: tag,
    57  			buf: &bytes.Buffer{},
    58  		}
    59  	}
    60  	return mf.parts[tag]
    61  }
    62  
    63  func (mfs *memFileSet) deleteFile(p, tag string) {
    64  	if tag == "" {
    65  		delete(mfs.additive, p)
    66  		mfs.deletive[p] = &memFile{path: p}
    67  		return
    68  	}
    69  	if mf, ok := mfs.additive[p]; ok {
    70  		delete(mf.parts, tag)
    71  	}
    72  	if _, ok := mfs.deletive[p]; !ok {
    73  		mfs.deletive[p] = &memFile{
    74  			path:  p,
    75  			parts: make(map[string]*memPart),
    76  		}
    77  	}
    78  	mf := mfs.deletive[p]
    79  	mf.parts[tag] = &memPart{tag: tag}
    80  }
    81  
    82  func (mfs *memFileSet) serialize(w *Writer) error {
    83  	if err := mfs.serializeAdditive(w); err != nil {
    84  		return err
    85  	}
    86  	return mfs.serializeDeletive(w)
    87  }
    88  
    89  func (mfs *memFileSet) serializeAdditive(w *Writer) error {
    90  	for _, mf := range sortMemFiles(mfs.additive) {
    91  		if err := w.Append(mf.path, func(fw *FileWriter) error {
    92  			return serializeParts(fw, mf)
    93  		}); err != nil {
    94  			return err
    95  		}
    96  	}
    97  	return nil
    98  }
    99  
   100  func serializeParts(fw *FileWriter, mf *memFile) error {
   101  	for _, mp := range sortMemParts(mf.parts) {
   102  		fw.Append(mp.tag)
   103  		if _, err := fw.Write(mp.buf.Bytes()); err != nil {
   104  			return err
   105  		}
   106  	}
   107  	return nil
   108  }
   109  
   110  func (mfs *memFileSet) serializeDeletive(w *Writer) error {
   111  	for _, mf := range sortMemFiles(mfs.deletive) {
   112  		var tags []string
   113  		for _, mp := range sortMemParts(mf.parts) {
   114  			tags = append(tags, mp.tag)
   115  		}
   116  		w.Delete(mf.path, tags...)
   117  	}
   118  	return nil
   119  }
   120  
   121  func sortMemFiles(mfs map[string]*memFile) []*memFile {
   122  	var result []*memFile
   123  	for _, mf := range mfs {
   124  		result = append(result, mf)
   125  	}
   126  	sort.SliceStable(result, func(i, j int) bool {
   127  		return result[i].path < result[j].path
   128  	})
   129  	return result
   130  }
   131  
   132  func sortMemParts(mps map[string]*memPart) []*memPart {
   133  	var result []*memPart
   134  	for _, mp := range mps {
   135  		result = append(result, mp)
   136  	}
   137  	sort.SliceStable(result, func(i, j int) bool {
   138  		return result[i].tag < result[j].tag
   139  	})
   140  	return result
   141  }
   142  
   143  // UnorderedWriter allows writing Files, unordered by path, into multiple ordered filesets.
   144  // This may be a full filesystem or a subfilesystem (e.g. datum / datum set / shard).
   145  type UnorderedWriter struct {
   146  	ctx                        context.Context
   147  	storage                    *Storage
   148  	memAvailable, memThreshold int64
   149  	name                       string
   150  	defaultTag                 string
   151  	memFileSet                 *memFileSet
   152  	subFileSet                 int64
   153  	ttl                        time.Duration
   154  	renewer                    *renew.StringSet
   155  }
   156  
   157  func newUnorderedWriter(ctx context.Context, storage *Storage, name string, memThreshold int64, defaultTag string, opts ...UnorderedWriterOption) (*UnorderedWriter, error) {
   158  	if err := storage.filesetSem.Acquire(ctx, 1); err != nil {
   159  		return nil, err
   160  	}
   161  	uw := &UnorderedWriter{
   162  		ctx:          ctx,
   163  		storage:      storage,
   164  		memAvailable: memThreshold,
   165  		memThreshold: memThreshold,
   166  		name:         name,
   167  		defaultTag:   defaultTag,
   168  		memFileSet:   newMemFileSet(),
   169  	}
   170  	for _, opt := range opts {
   171  		opt(uw)
   172  	}
   173  	return uw, nil
   174  }
   175  
   176  // Put reads files from a tar stream and adds them to the fileset.
   177  // TODO: Make overwrite work with tags.
   178  func (uw *UnorderedWriter) Put(r io.Reader, overwrite bool, customTag ...string) error {
   179  	tag := uw.defaultTag
   180  	if len(customTag) > 0 && customTag[0] != "" {
   181  		tag = customTag[0]
   182  	}
   183  	tr := tar.NewReader(r)
   184  	for {
   185  		hdr, err := tr.Next()
   186  		if err != nil {
   187  			if errors.Is(err, io.EOF) {
   188  				return nil
   189  			}
   190  			return err
   191  		}
   192  		p := Clean(hdr.Name, hdr.FileInfo().IsDir())
   193  		if hdr.Typeflag == tar.TypeDir {
   194  			continue
   195  		}
   196  		// TODO: Tag overwrite?
   197  		if overwrite {
   198  			uw.memFileSet.deleteFile(p, "")
   199  		}
   200  		w := uw.memFileSet.appendFile(p, tag)
   201  		for {
   202  			n, err := io.CopyN(w, tr, uw.memAvailable)
   203  			uw.memAvailable -= n
   204  			if err != nil {
   205  				if errors.Is(err, io.EOF) {
   206  					break
   207  				}
   208  				return err
   209  			}
   210  			if uw.memAvailable == 0 {
   211  				if err := uw.serialize(); err != nil {
   212  					return err
   213  				}
   214  				w = uw.memFileSet.appendFile(p, tag)
   215  			}
   216  		}
   217  	}
   218  }
   219  
   220  // Delete deletes a file from the file set.
   221  // TODO: Directory deletion needs more invariant checks.
   222  // Right now you have to specify the trailing slash explicitly.
   223  func (uw *UnorderedWriter) Delete(name string, tags ...string) {
   224  	name = Clean(name, IsDir(name))
   225  	var tag string
   226  	if len(tag) > 0 {
   227  		tag = tags[0]
   228  	}
   229  	uw.memFileSet.deleteFile(name, tag)
   230  }
   231  
   232  // serialize will be called whenever the in-memory file set is past the memory threshold.
   233  // A new in-memory file set will be created for the following operations.
   234  func (uw *UnorderedWriter) serialize() error {
   235  	// Serialize file set.
   236  	var writerOpts []WriterOption
   237  	if uw.ttl > 0 {
   238  		writerOpts = append(writerOpts, WithTTL(uw.ttl))
   239  	}
   240  	p := path.Join(uw.name, SubFileSetStr(uw.subFileSet))
   241  	w := uw.storage.newWriter(uw.ctx, p, writerOpts...)
   242  	if err := uw.memFileSet.serialize(w); err != nil {
   243  		return err
   244  	}
   245  	if err := w.Close(); err != nil {
   246  		return err
   247  	}
   248  	if uw.renewer != nil {
   249  		uw.renewer.Add(p)
   250  	}
   251  	// Reset in-memory file set.
   252  	uw.memFileSet = newMemFileSet()
   253  	uw.memAvailable = uw.memThreshold
   254  	uw.subFileSet++
   255  	return nil
   256  }
   257  
   258  // Close closes the writer.
   259  func (uw *UnorderedWriter) Close() error {
   260  	defer uw.storage.filesetSem.Release(1)
   261  	return uw.serialize()
   262  }