github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/fileset/index/writer.go (about)

     1  package index
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  
     7  	"github.com/pachyderm/pachyderm/src/client/pkg/pbutil"
     8  	"github.com/pachyderm/pachyderm/src/server/pkg/storage/chunk"
     9  	"github.com/pachyderm/pachyderm/src/server/pkg/uuid"
    10  )
    11  
    12  var (
    13  	averageBits = 20
    14  )
    15  
    16  type levelWriter struct {
    17  	cw      *chunk.Writer
    18  	pbw     pbutil.Writer
    19  	lastIdx *Index
    20  }
    21  
    22  type data struct {
    23  	idx   *Index
    24  	level int
    25  }
    26  
    27  // Writer is used for creating a multilevel index into a serialized file set.
    28  // Each index level is a stream of byte length encoded index entries that are stored in chunk storage.
    29  type Writer struct {
    30  	ctx    context.Context
    31  	chunks *chunk.Storage
    32  	tmpID  string
    33  
    34  	mu     sync.Mutex
    35  	levels []*levelWriter
    36  	closed bool
    37  	root   *Index
    38  }
    39  
    40  // NewWriter create a new Writer.
    41  func NewWriter(ctx context.Context, chunks *chunk.Storage, tmpID string) *Writer {
    42  	return &Writer{
    43  		ctx:    ctx,
    44  		chunks: chunks,
    45  		tmpID:  tmpID,
    46  	}
    47  }
    48  
    49  // WriteIndex writes an index entry.
    50  func (w *Writer) WriteIndex(idx *Index) error {
    51  	w.mu.Lock()
    52  	defer w.mu.Unlock()
    53  	w.setupLevels()
    54  	unresolveParts(idx)
    55  	return w.writeIndex(idx, 0)
    56  }
    57  
    58  func (w *Writer) setupLevels() {
    59  	// Setup the first index level.
    60  	if w.levels == nil {
    61  		cw := w.chunks.NewWriter(w.ctx, w.tmpID, w.callback(0), chunk.WithRollingHashConfig(averageBits, 0))
    62  		w.levels = append(w.levels, &levelWriter{
    63  			cw:  cw,
    64  			pbw: pbutil.NewWriter(cw),
    65  		})
    66  	}
    67  }
    68  
    69  func (w *Writer) writeIndex(idx *Index, level int) error {
    70  	l := w.levels[level]
    71  	var refDataRefs []*chunk.DataRef
    72  	if idx.Range != nil {
    73  		refDataRefs = []*chunk.DataRef{idx.Range.ChunkRef}
    74  	}
    75  	if idx.File != nil {
    76  		refDataRefs = append(refDataRefs, idx.File.DataRefs...)
    77  	}
    78  	// Create an annotation for each index.
    79  	if err := l.cw.Annotate(&chunk.Annotation{
    80  		RefDataRefs: refDataRefs,
    81  		Data: &data{
    82  			idx:   idx,
    83  			level: level,
    84  		},
    85  	}); err != nil {
    86  		return err
    87  	}
    88  	_, err := l.pbw.Write(idx)
    89  	return err
    90  }
    91  
    92  func (w *Writer) callback(level int) chunk.WriterCallback {
    93  	return func(annotations []*chunk.Annotation) error {
    94  		w.mu.Lock()
    95  		defer w.mu.Unlock()
    96  		if len(annotations) == 0 {
    97  			return nil
    98  		}
    99  		lw := w.levels[level]
   100  		// Extract first and last index and setup file range.
   101  		idx := annotations[0].Data.(*data).idx
   102  		dataRef := annotations[0].NextDataRef
   103  		// Edge case handling.
   104  		if len(annotations) > 1 {
   105  			// Skip the first index if it started in the previous chunk.
   106  			if lw.lastIdx != nil && idx.Path == lw.lastIdx.Path {
   107  				idx = annotations[1].Data.(*data).idx
   108  				dataRef = annotations[1].NextDataRef
   109  			}
   110  		}
   111  		lw.lastIdx = annotations[len(annotations)-1].Data.(*data).idx
   112  		// Set standard fields in index.
   113  		lastPath := lw.lastIdx.Path
   114  		if lw.lastIdx.Range != nil {
   115  			lastPath = lw.lastIdx.Range.LastPath
   116  		}
   117  		idx.Range = &Range{
   118  			Offset:   dataRef.OffsetBytes,
   119  			LastPath: lastPath,
   120  			ChunkRef: chunk.Reference(dataRef),
   121  		}
   122  		// Set the root index when the writer is closed and we are at the top index level.
   123  		if w.closed {
   124  			w.root = idx
   125  		}
   126  		// Create next index level if it does not exist.
   127  		if level == len(w.levels)-1 {
   128  			cw := w.chunks.NewWriter(w.ctx, uuid.NewWithoutDashes(), w.callback(level+1), chunk.WithRollingHashConfig(averageBits, int64(level+1)))
   129  			w.levels = append(w.levels, &levelWriter{
   130  				cw:  cw,
   131  				pbw: pbutil.NewWriter(cw),
   132  			})
   133  		}
   134  		// Write index entry in next index level.
   135  		return w.writeIndex(idx, level+1)
   136  	}
   137  }
   138  
   139  // Close finishes the index, and returns the serialized top index level.
   140  func (w *Writer) Close() (ret *Index, retErr error) {
   141  	w.mu.Lock()
   142  	w.closed = true
   143  	w.mu.Unlock()
   144  
   145  	// Note: new levels can be created while closing, so the number of iterations
   146  	// necessary can increase as the levels are being closed. Levels stop getting
   147  	// created when the top level chunk writer has been closed and the number of
   148  	// annotations and chunks it has is one (one annotation in one chunk).
   149  	for i := 0; i < len(w.levels); i++ {
   150  		w.mu.Lock()
   151  		l := w.levels[i]
   152  		w.mu.Unlock()
   153  		if err := l.cw.Close(); err != nil {
   154  			return nil, err
   155  		}
   156  		if l.cw.AnnotationCount() == 1 && l.cw.ChunkCount() == 1 {
   157  			break
   158  		}
   159  	}
   160  	return w.root, nil
   161  }