github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/fileset/writer.go (about)

     1  package fileset
     2  
     3  import (
     4  	"context"
     5  	"time"
     6  
     7  	"github.com/pachyderm/pachyderm/src/client/pkg/errors"
     8  	"github.com/pachyderm/pachyderm/src/server/pkg/storage/chunk"
     9  	"github.com/pachyderm/pachyderm/src/server/pkg/storage/fileset/index"
    10  	"github.com/pachyderm/pachyderm/src/server/pkg/storage/track"
    11  	"github.com/pachyderm/pachyderm/src/server/pkg/uuid"
    12  )
    13  
    14  // TODO: Size zero files need to be addressed now that we are moving away from storing tar headers.
    15  // We can run into the same issue as deletions where a lot of size zero files can cause us to get backed up
    16  // since no chunks will get created. The solution we have in mind is to write a small number of bytes
    17  // for a size zero file, then either not store references to them or ignore them at read time.
    18  
    19  // FileWriter provides functionality for writing a file.
    20  type FileWriter struct {
    21  	w   *Writer
    22  	cw  *chunk.Writer
    23  	idx *index.Index
    24  }
    25  
    26  // Append sets an append tag for the next set of bytes.
    27  func (fw *FileWriter) Append(tag string) {
    28  	fw.idx.File.Parts = append(fw.idx.File.Parts, &index.Part{Tag: tag})
    29  }
    30  
    31  func (fw *FileWriter) Write(data []byte) (int, error) {
    32  	parts := fw.idx.File.Parts
    33  	part := parts[len(parts)-1]
    34  	part.SizeBytes += int64(len(data))
    35  	fw.w.sizeBytes += int64(len(data))
    36  	return fw.cw.Write(data)
    37  }
    38  
    39  // Writer provides functionality for writing a file set.
    40  type Writer struct {
    41  	ctx                context.Context
    42  	tracker            track.Tracker
    43  	store              Store
    44  	path               string
    45  	additive, deletive *index.Writer
    46  	sizeBytes          int64
    47  	cw                 *chunk.Writer
    48  	idx                *index.Index
    49  	deletePath         string
    50  	lastIdx            *index.Index
    51  	noUpload           bool
    52  	indexFunc          func(*index.Index) error
    53  	ttl                time.Duration
    54  }
    55  
    56  func newWriter(ctx context.Context, store Store, tracker track.Tracker, chunks *chunk.Storage, path string, opts ...WriterOption) *Writer {
    57  	uuidStr := uuid.NewWithoutDashes()
    58  	w := &Writer{
    59  		ctx:     ctx,
    60  		store:   store,
    61  		tracker: tracker,
    62  		path:    path,
    63  	}
    64  	for _, opt := range opts {
    65  		opt(w)
    66  	}
    67  	var chunkWriterOpts []chunk.WriterOption
    68  	if w.noUpload {
    69  		chunkWriterOpts = append(chunkWriterOpts, chunk.WithNoUpload())
    70  	}
    71  	w.additive = index.NewWriter(ctx, chunks, "additive-index-writer-"+uuidStr)
    72  	w.deletive = index.NewWriter(ctx, chunks, "deletive-index-writer-"+uuidStr)
    73  	w.cw = chunks.NewWriter(ctx, "chunk-writer-"+uuidStr, w.callback, chunkWriterOpts...)
    74  	return w
    75  }
    76  
    77  // Append creates an append operation for a file and provides a scoped file writer.
    78  func (w *Writer) Append(p string, cb func(*FileWriter) error) error {
    79  	fw, err := w.newFileWriter(p, w.cw)
    80  	if err != nil {
    81  		return err
    82  	}
    83  	return cb(fw)
    84  }
    85  
    86  func (w *Writer) newFileWriter(p string, cw *chunk.Writer) (*FileWriter, error) {
    87  	idx := &index.Index{
    88  		Path: p,
    89  		File: &index.File{},
    90  	}
    91  	if err := w.nextIdx(idx); err != nil {
    92  		return nil, err
    93  	}
    94  	return &FileWriter{
    95  		w:   w,
    96  		cw:  cw,
    97  		idx: idx,
    98  	}, nil
    99  }
   100  
   101  func (w *Writer) nextIdx(idx *index.Index) error {
   102  	if w.idx != nil {
   103  		if err := w.checkPath(w.idx.Path, idx.Path); err != nil {
   104  			return err
   105  		}
   106  	}
   107  	w.idx = idx
   108  	return w.cw.Annotate(&chunk.Annotation{
   109  		Data: idx,
   110  	})
   111  }
   112  
   113  // Delete creates a delete operation for a file.
   114  // TODO: Check path order.
   115  func (w *Writer) Delete(p string, tags ...string) error {
   116  	if w.deletePath != "" {
   117  		if err := w.checkPath(w.deletePath, p); err != nil {
   118  			return err
   119  		}
   120  	}
   121  	w.deletePath = p
   122  	idx := &index.Index{
   123  		Path: p,
   124  		File: &index.File{},
   125  	}
   126  	for _, tag := range tags {
   127  		idx.File.Parts = append(idx.File.Parts, &index.Part{Tag: tag})
   128  	}
   129  	return w.deletive.WriteIndex(idx)
   130  }
   131  
   132  func (w *Writer) checkPath(prev, p string) error {
   133  	if prev == p {
   134  		return errors.Errorf("cannot write same path (%s) twice", p)
   135  	}
   136  	if prev > p {
   137  		return errors.Errorf("cannot write path (%s) after (%s)", p, prev)
   138  	}
   139  	return nil
   140  }
   141  
   142  // Copy copies a file to the file set writer.
   143  func (w *Writer) Copy(file File) error {
   144  	idx := file.Index()
   145  	copyIdx := &index.Index{
   146  		Path: idx.Path,
   147  		File: &index.File{
   148  			Parts: idx.File.Parts,
   149  		},
   150  	}
   151  	if err := w.nextIdx(copyIdx); err != nil {
   152  		return err
   153  	}
   154  	// Copy the file data refs if they are resolved.
   155  	if idx.File.DataRefs != nil {
   156  		for _, dataRef := range idx.File.DataRefs {
   157  			w.sizeBytes += dataRef.SizeBytes
   158  			if err := w.cw.Copy(dataRef); err != nil {
   159  				return err
   160  			}
   161  		}
   162  		return nil
   163  	}
   164  	// Copy the file part data refs otherwise.
   165  	for _, part := range idx.File.Parts {
   166  		for _, dataRef := range part.DataRefs {
   167  			w.sizeBytes += dataRef.SizeBytes
   168  			if err := w.cw.Copy(dataRef); err != nil {
   169  				return err
   170  			}
   171  		}
   172  	}
   173  	return nil
   174  }
   175  
   176  func (w *Writer) callback(annotations []*chunk.Annotation) error {
   177  	for _, annotation := range annotations {
   178  		idx := annotation.Data.(*index.Index)
   179  		if w.lastIdx == nil {
   180  			w.lastIdx = idx
   181  		}
   182  		if idx.Path != w.lastIdx.Path {
   183  			if !w.noUpload {
   184  				if err := w.additive.WriteIndex(w.lastIdx); err != nil {
   185  					return err
   186  				}
   187  			}
   188  			if w.indexFunc != nil {
   189  				if err := w.indexFunc(w.lastIdx); err != nil {
   190  					return err
   191  				}
   192  			}
   193  			w.lastIdx = idx
   194  		}
   195  		if annotation.NextDataRef != nil {
   196  			w.lastIdx.File.DataRefs = append(w.lastIdx.File.DataRefs, annotation.NextDataRef)
   197  		}
   198  	}
   199  	return nil
   200  }
   201  
   202  // Close closes the writer.
   203  func (w *Writer) Close() error {
   204  	if err := w.cw.Close(); err != nil {
   205  		return err
   206  	}
   207  	// Write out the last index.
   208  	if w.lastIdx != nil {
   209  		idx := w.lastIdx
   210  		if !w.noUpload {
   211  			if err := w.additive.WriteIndex(idx); err != nil {
   212  				return err
   213  			}
   214  		}
   215  		if w.indexFunc != nil {
   216  			if err := w.indexFunc(idx); err != nil {
   217  				return err
   218  			}
   219  		}
   220  	}
   221  	if w.noUpload {
   222  		return nil
   223  	}
   224  	// Close the index writers.
   225  	additiveIdx, err := w.additive.Close()
   226  	if err != nil {
   227  		return err
   228  	}
   229  	deletiveIdx, err := w.deletive.Close()
   230  	if err != nil {
   231  		return err
   232  	}
   233  	// TODO: This should be one transaction.
   234  	if err := createTrackerObject(w.ctx, w.path, []*index.Index{additiveIdx, deletiveIdx}, w.tracker, w.ttl); err != nil {
   235  		return err
   236  	}
   237  	return w.store.Set(w.ctx, w.path, &Metadata{
   238  		Path:      w.path,
   239  		Additive:  additiveIdx,
   240  		Deletive:  deletiveIdx,
   241  		SizeBytes: w.sizeBytes,
   242  	})
   243  }