github.com/pachyderm/pachyderm@v1.13.4/src/server/pkg/storage/fileset/index/writer.go (about) 1 package index 2 3 import ( 4 "context" 5 "sync" 6 7 "github.com/pachyderm/pachyderm/src/client/pkg/pbutil" 8 "github.com/pachyderm/pachyderm/src/server/pkg/storage/chunk" 9 "github.com/pachyderm/pachyderm/src/server/pkg/uuid" 10 ) 11 12 var ( 13 averageBits = 20 14 ) 15 16 type levelWriter struct { 17 cw *chunk.Writer 18 pbw pbutil.Writer 19 lastIdx *Index 20 } 21 22 type data struct { 23 idx *Index 24 level int 25 } 26 27 // Writer is used for creating a multilevel index into a serialized file set. 28 // Each index level is a stream of byte length encoded index entries that are stored in chunk storage. 29 type Writer struct { 30 ctx context.Context 31 chunks *chunk.Storage 32 tmpID string 33 34 mu sync.Mutex 35 levels []*levelWriter 36 closed bool 37 root *Index 38 } 39 40 // NewWriter create a new Writer. 41 func NewWriter(ctx context.Context, chunks *chunk.Storage, tmpID string) *Writer { 42 return &Writer{ 43 ctx: ctx, 44 chunks: chunks, 45 tmpID: tmpID, 46 } 47 } 48 49 // WriteIndex writes an index entry. 50 func (w *Writer) WriteIndex(idx *Index) error { 51 w.mu.Lock() 52 defer w.mu.Unlock() 53 w.setupLevels() 54 unresolveParts(idx) 55 return w.writeIndex(idx, 0) 56 } 57 58 func (w *Writer) setupLevels() { 59 // Setup the first index level. 60 if w.levels == nil { 61 cw := w.chunks.NewWriter(w.ctx, w.tmpID, w.callback(0), chunk.WithRollingHashConfig(averageBits, 0)) 62 w.levels = append(w.levels, &levelWriter{ 63 cw: cw, 64 pbw: pbutil.NewWriter(cw), 65 }) 66 } 67 } 68 69 func (w *Writer) writeIndex(idx *Index, level int) error { 70 l := w.levels[level] 71 var refDataRefs []*chunk.DataRef 72 if idx.Range != nil { 73 refDataRefs = []*chunk.DataRef{idx.Range.ChunkRef} 74 } 75 if idx.File != nil { 76 refDataRefs = append(refDataRefs, idx.File.DataRefs...) 77 } 78 // Create an annotation for each index. 79 if err := l.cw.Annotate(&chunk.Annotation{ 80 RefDataRefs: refDataRefs, 81 Data: &data{ 82 idx: idx, 83 level: level, 84 }, 85 }); err != nil { 86 return err 87 } 88 _, err := l.pbw.Write(idx) 89 return err 90 } 91 92 func (w *Writer) callback(level int) chunk.WriterCallback { 93 return func(annotations []*chunk.Annotation) error { 94 w.mu.Lock() 95 defer w.mu.Unlock() 96 if len(annotations) == 0 { 97 return nil 98 } 99 lw := w.levels[level] 100 // Extract first and last index and setup file range. 101 idx := annotations[0].Data.(*data).idx 102 dataRef := annotations[0].NextDataRef 103 // Edge case handling. 104 if len(annotations) > 1 { 105 // Skip the first index if it started in the previous chunk. 106 if lw.lastIdx != nil && idx.Path == lw.lastIdx.Path { 107 idx = annotations[1].Data.(*data).idx 108 dataRef = annotations[1].NextDataRef 109 } 110 } 111 lw.lastIdx = annotations[len(annotations)-1].Data.(*data).idx 112 // Set standard fields in index. 113 lastPath := lw.lastIdx.Path 114 if lw.lastIdx.Range != nil { 115 lastPath = lw.lastIdx.Range.LastPath 116 } 117 idx.Range = &Range{ 118 Offset: dataRef.OffsetBytes, 119 LastPath: lastPath, 120 ChunkRef: chunk.Reference(dataRef), 121 } 122 // Set the root index when the writer is closed and we are at the top index level. 123 if w.closed { 124 w.root = idx 125 } 126 // Create next index level if it does not exist. 127 if level == len(w.levels)-1 { 128 cw := w.chunks.NewWriter(w.ctx, uuid.NewWithoutDashes(), w.callback(level+1), chunk.WithRollingHashConfig(averageBits, int64(level+1))) 129 w.levels = append(w.levels, &levelWriter{ 130 cw: cw, 131 pbw: pbutil.NewWriter(cw), 132 }) 133 } 134 // Write index entry in next index level. 135 return w.writeIndex(idx, level+1) 136 } 137 } 138 139 // Close finishes the index, and returns the serialized top index level. 140 func (w *Writer) Close() (ret *Index, retErr error) { 141 w.mu.Lock() 142 w.closed = true 143 w.mu.Unlock() 144 145 // Note: new levels can be created while closing, so the number of iterations 146 // necessary can increase as the levels are being closed. Levels stop getting 147 // created when the top level chunk writer has been closed and the number of 148 // annotations and chunks it has is one (one annotation in one chunk). 149 for i := 0; i < len(w.levels); i++ { 150 w.mu.Lock() 151 l := w.levels[i] 152 w.mu.Unlock() 153 if err := l.cw.Close(); err != nil { 154 return nil, err 155 } 156 if l.cw.AnnotationCount() == 1 && l.cw.ChunkCount() == 1 { 157 break 158 } 159 } 160 return w.root, nil 161 }