github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/committed/commit.go (about) 1 package committed 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 8 "github.com/treeverse/lakefs/pkg/graveler" 9 "github.com/treeverse/lakefs/pkg/logging" 10 ) 11 12 type CommitOptions struct { 13 // Set to allow commits that change nothing (otherwise ErrNoChanges) 14 AllowEmpty bool 15 } 16 17 type committer struct { 18 ctx context.Context 19 logger logging.Logger 20 21 writer MetaRangeWriter 22 base Iterator 23 changes graveler.ValueIterator 24 opts *CommitOptions 25 summary graveler.DiffSummary 26 haveChanges, haveBase bool 27 } 28 29 // applyAllBase writes all remaining changes from Base Iterator to writer 30 func (a *committer) applyAllBase(iter Iterator) error { 31 for { 32 select { 33 case <-a.ctx.Done(): 34 return a.ctx.Err() 35 default: 36 } 37 iterValue, iterRange := iter.Value() 38 if iterValue == nil { 39 if a.logger.IsTracing() { 40 a.logger.WithFields(logging.Fields{ 41 "from": string(iterRange.MinKey), 42 "to": string(iterRange.MaxKey), 43 "ID": iterRange.ID, 44 }).Trace("copy entire range at end") 45 } 46 if err := a.writer.WriteRange(*iterRange); err != nil { 47 return fmt.Errorf("copy iter range %s: %w", iterRange.ID, err) 48 } 49 if !iter.NextRange() { 50 break 51 } 52 } else { 53 if a.logger.IsTracing() { 54 a.logger.WithFields(logging.Fields{ 55 "key": string(iterValue.Key), 56 "ID": string(iterValue.Identity), 57 }).Trace("write key from iter at end") 58 } 59 if err := a.writer.WriteRecord(*iterValue); err != nil { 60 return fmt.Errorf("write iter record: %w", err) 61 } 62 if !iter.Next() { 63 break 64 } 65 } 66 } 67 return iter.Err() 68 } 69 70 // applyAllChanges writes all remaining changes from Changes Iterator to writer and returns the number of writes 71 func (a *committer) applyAllChanges(iter graveler.ValueIterator) (int, error) { 72 var count int 73 for { 74 select { 75 case <-a.ctx.Done(): 76 return 0, a.ctx.Err() 77 default: 78 } 79 iterValue := iter.Value() 80 if !iterValue.IsTombstone() { 81 if a.logger.IsTracing() { 82 a.logger.WithFields(logging.Fields{ 83 "key": string(iterValue.Key), 84 "ID": string(iterValue.Identity), 85 }).Trace("write key from iter at end") 86 } 87 if err := a.writer.WriteRecord(*iterValue); err != nil { 88 return 0, fmt.Errorf("write iter record: %w", err) 89 } 90 count++ 91 } 92 if !iter.Next() { 93 break 94 } 95 } 96 return count, iter.Err() 97 } 98 99 func (a *committer) hasChanges(summary graveler.DiffSummary) bool { 100 for _, changes := range summary.Count { 101 if changes > 0 { 102 return true 103 } 104 } 105 return false 106 } 107 108 func (a *committer) addIntoDiffSummary(typ graveler.DiffType, n int) { 109 a.summary.Count[typ] += n 110 } 111 112 func (a *committer) incrementDiffSummary(typ graveler.DiffType) { 113 a.addIntoDiffSummary(typ, 1) 114 } 115 116 func (a *committer) applyBaseRange(baseRange *Range, changeValue *graveler.ValueRecord) error { 117 if bytes.Compare(baseRange.MaxKey, changeValue.Key) < 0 { 118 // Base at start of range which we do not need to scan -- 119 // write and skip that entire range. 120 if a.logger.IsTracing() { 121 a.logger.WithFields(logging.Fields{ 122 "from": string(baseRange.MinKey), 123 "to": string(baseRange.MaxKey), 124 "ID": baseRange.ID, 125 }).Trace("copy entire base range") 126 } 127 128 if err := a.writer.WriteRange(*baseRange); err != nil { 129 return fmt.Errorf("copy base range %s: %w", baseRange.ID, err) 130 } 131 a.haveBase = a.base.NextRange() 132 } else { 133 // Base is at start of range which we need to scan, enter it. 134 a.haveBase = a.base.Next() 135 } 136 return nil 137 } 138 139 func (a *committer) applyNextKey(baseValue *graveler.ValueRecord, changeValue *graveler.ValueRecord) error { 140 var writeRecord *graveler.ValueRecord 141 142 compare := bytes.Compare(baseValue.Key, changeValue.Key) 143 switch { 144 case compare < 0: 145 // base key is smaller than change key - select record from base 146 writeRecord = baseValue 147 case changeValue.IsTombstone(): 148 // base key is equal or bigger - handle tombstone (delete) 149 if compare == 0 { 150 // key is equal - report as deleted 151 a.incrementDiffSummary(graveler.DiffTypeRemoved) 152 } 153 case compare == 0: 154 // base key is equal, no tombstone - handle change 155 if bytes.Equal(baseValue.Identity, changeValue.Identity) { 156 // same identity - just write the base (do not report any change) 157 writeRecord = baseValue 158 } else { 159 a.incrementDiffSummary(graveler.DiffTypeChanged) 160 writeRecord = changeValue 161 } 162 default: 163 // base key is bigger, no tombstone - handle new key 164 a.incrementDiffSummary(graveler.DiffTypeAdded) 165 writeRecord = changeValue 166 } 167 168 // Write record if needed 169 if writeRecord != nil { 170 if a.logger.IsTracing() { 171 a.logger.WithFields(logging.Fields{ 172 "key": string(writeRecord.Key), 173 "identity": string(writeRecord.Identity), 174 }).Trace("write record") 175 } 176 if err := a.writer.WriteRecord(*writeRecord); err != nil { 177 return fmt.Errorf("write record: %w", err) 178 } 179 } 180 181 // Update base and changes iterator to the next element 182 if compare >= 0 { 183 // used up this record from changes 184 a.haveChanges = a.changes.Next() 185 } 186 if compare <= 0 { 187 // used up this record from base 188 a.haveBase = a.base.Next() 189 } 190 return nil 191 } 192 193 func (a *committer) commit() error { 194 a.haveBase, a.haveChanges = a.base.Next(), a.changes.Next() 195 for a.haveBase && a.haveChanges { 196 select { 197 case <-a.ctx.Done(): 198 return a.ctx.Err() 199 default: 200 } 201 baseValue, baseRange := a.base.Value() 202 changeValue := a.changes.Value() 203 var err error 204 if baseValue == nil { 205 err = a.applyBaseRange(baseRange, changeValue) 206 } else { 207 err = a.applyNextKey(baseValue, changeValue) 208 } 209 if err != nil { 210 return err 211 } 212 } 213 if err := a.base.Err(); err != nil { 214 return err 215 } 216 if err := a.changes.Err(); err != nil { 217 return err 218 } 219 if a.haveBase { 220 if err := a.applyAllBase(a.base); err != nil { 221 return err 222 } 223 } 224 225 if a.haveChanges { 226 numAdded, err := a.applyAllChanges(a.changes) 227 if err != nil { 228 return err 229 } 230 if numAdded > 0 { 231 a.addIntoDiffSummary(graveler.DiffTypeAdded, numAdded) 232 } 233 } 234 235 if !a.opts.AllowEmpty && !a.hasChanges(a.summary) { 236 return graveler.ErrNoChanges 237 } 238 return a.changes.Err() 239 } 240 241 func Commit(ctx context.Context, writer MetaRangeWriter, base Iterator, changes graveler.ValueIterator, opts *CommitOptions) (graveler.DiffSummary, error) { 242 c := committer{ 243 ctx: ctx, 244 logger: logging.FromContext(ctx), 245 writer: writer, 246 base: base, 247 changes: changes, 248 opts: opts, 249 summary: graveler.DiffSummary{Count: make(map[graveler.DiffType]int)}, 250 } 251 return c.summary, c.commit() 252 }