github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/committed/commit.go (about)

     1  package committed
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  
     8  	"github.com/treeverse/lakefs/pkg/graveler"
     9  	"github.com/treeverse/lakefs/pkg/logging"
    10  )
    11  
    12  type CommitOptions struct {
    13  	// Set to allow commits that change nothing (otherwise ErrNoChanges)
    14  	AllowEmpty bool
    15  }
    16  
    17  type committer struct {
    18  	ctx    context.Context
    19  	logger logging.Logger
    20  
    21  	writer                MetaRangeWriter
    22  	base                  Iterator
    23  	changes               graveler.ValueIterator
    24  	opts                  *CommitOptions
    25  	summary               graveler.DiffSummary
    26  	haveChanges, haveBase bool
    27  }
    28  
    29  // applyAllBase writes all remaining changes from Base Iterator to writer
    30  func (a *committer) applyAllBase(iter Iterator) error {
    31  	for {
    32  		select {
    33  		case <-a.ctx.Done():
    34  			return a.ctx.Err()
    35  		default:
    36  		}
    37  		iterValue, iterRange := iter.Value()
    38  		if iterValue == nil {
    39  			if a.logger.IsTracing() {
    40  				a.logger.WithFields(logging.Fields{
    41  					"from": string(iterRange.MinKey),
    42  					"to":   string(iterRange.MaxKey),
    43  					"ID":   iterRange.ID,
    44  				}).Trace("copy entire range at end")
    45  			}
    46  			if err := a.writer.WriteRange(*iterRange); err != nil {
    47  				return fmt.Errorf("copy iter range %s: %w", iterRange.ID, err)
    48  			}
    49  			if !iter.NextRange() {
    50  				break
    51  			}
    52  		} else {
    53  			if a.logger.IsTracing() {
    54  				a.logger.WithFields(logging.Fields{
    55  					"key": string(iterValue.Key),
    56  					"ID":  string(iterValue.Identity),
    57  				}).Trace("write key from iter at end")
    58  			}
    59  			if err := a.writer.WriteRecord(*iterValue); err != nil {
    60  				return fmt.Errorf("write iter record: %w", err)
    61  			}
    62  			if !iter.Next() {
    63  				break
    64  			}
    65  		}
    66  	}
    67  	return iter.Err()
    68  }
    69  
    70  // applyAllChanges writes all remaining changes from Changes Iterator to writer and returns the number of writes
    71  func (a *committer) applyAllChanges(iter graveler.ValueIterator) (int, error) {
    72  	var count int
    73  	for {
    74  		select {
    75  		case <-a.ctx.Done():
    76  			return 0, a.ctx.Err()
    77  		default:
    78  		}
    79  		iterValue := iter.Value()
    80  		if !iterValue.IsTombstone() {
    81  			if a.logger.IsTracing() {
    82  				a.logger.WithFields(logging.Fields{
    83  					"key": string(iterValue.Key),
    84  					"ID":  string(iterValue.Identity),
    85  				}).Trace("write key from iter at end")
    86  			}
    87  			if err := a.writer.WriteRecord(*iterValue); err != nil {
    88  				return 0, fmt.Errorf("write iter record: %w", err)
    89  			}
    90  			count++
    91  		}
    92  		if !iter.Next() {
    93  			break
    94  		}
    95  	}
    96  	return count, iter.Err()
    97  }
    98  
    99  func (a *committer) hasChanges(summary graveler.DiffSummary) bool {
   100  	for _, changes := range summary.Count {
   101  		if changes > 0 {
   102  			return true
   103  		}
   104  	}
   105  	return false
   106  }
   107  
   108  func (a *committer) addIntoDiffSummary(typ graveler.DiffType, n int) {
   109  	a.summary.Count[typ] += n
   110  }
   111  
   112  func (a *committer) incrementDiffSummary(typ graveler.DiffType) {
   113  	a.addIntoDiffSummary(typ, 1)
   114  }
   115  
   116  func (a *committer) applyBaseRange(baseRange *Range, changeValue *graveler.ValueRecord) error {
   117  	if bytes.Compare(baseRange.MaxKey, changeValue.Key) < 0 {
   118  		// Base at start of range which we do not need to scan --
   119  		// write and skip that entire range.
   120  		if a.logger.IsTracing() {
   121  			a.logger.WithFields(logging.Fields{
   122  				"from": string(baseRange.MinKey),
   123  				"to":   string(baseRange.MaxKey),
   124  				"ID":   baseRange.ID,
   125  			}).Trace("copy entire base range")
   126  		}
   127  
   128  		if err := a.writer.WriteRange(*baseRange); err != nil {
   129  			return fmt.Errorf("copy base range %s: %w", baseRange.ID, err)
   130  		}
   131  		a.haveBase = a.base.NextRange()
   132  	} else {
   133  		// Base is at start of range which we need to scan, enter it.
   134  		a.haveBase = a.base.Next()
   135  	}
   136  	return nil
   137  }
   138  
   139  func (a *committer) applyNextKey(baseValue *graveler.ValueRecord, changeValue *graveler.ValueRecord) error {
   140  	var writeRecord *graveler.ValueRecord
   141  
   142  	compare := bytes.Compare(baseValue.Key, changeValue.Key)
   143  	switch {
   144  	case compare < 0:
   145  		// base key is smaller than change key - select record from base
   146  		writeRecord = baseValue
   147  	case changeValue.IsTombstone():
   148  		// base key is equal or bigger - handle tombstone (delete)
   149  		if compare == 0 {
   150  			// key is equal - report as deleted
   151  			a.incrementDiffSummary(graveler.DiffTypeRemoved)
   152  		}
   153  	case compare == 0:
   154  		// base key is equal, no tombstone - handle change
   155  		if bytes.Equal(baseValue.Identity, changeValue.Identity) {
   156  			// same identity - just write the base (do not report any change)
   157  			writeRecord = baseValue
   158  		} else {
   159  			a.incrementDiffSummary(graveler.DiffTypeChanged)
   160  			writeRecord = changeValue
   161  		}
   162  	default:
   163  		// base key is bigger, no tombstone - handle new key
   164  		a.incrementDiffSummary(graveler.DiffTypeAdded)
   165  		writeRecord = changeValue
   166  	}
   167  
   168  	// Write record if needed
   169  	if writeRecord != nil {
   170  		if a.logger.IsTracing() {
   171  			a.logger.WithFields(logging.Fields{
   172  				"key":      string(writeRecord.Key),
   173  				"identity": string(writeRecord.Identity),
   174  			}).Trace("write record")
   175  		}
   176  		if err := a.writer.WriteRecord(*writeRecord); err != nil {
   177  			return fmt.Errorf("write record: %w", err)
   178  		}
   179  	}
   180  
   181  	// Update base and changes iterator to the next element
   182  	if compare >= 0 {
   183  		// used up this record from changes
   184  		a.haveChanges = a.changes.Next()
   185  	}
   186  	if compare <= 0 {
   187  		// used up this record from base
   188  		a.haveBase = a.base.Next()
   189  	}
   190  	return nil
   191  }
   192  
   193  func (a *committer) commit() error {
   194  	a.haveBase, a.haveChanges = a.base.Next(), a.changes.Next()
   195  	for a.haveBase && a.haveChanges {
   196  		select {
   197  		case <-a.ctx.Done():
   198  			return a.ctx.Err()
   199  		default:
   200  		}
   201  		baseValue, baseRange := a.base.Value()
   202  		changeValue := a.changes.Value()
   203  		var err error
   204  		if baseValue == nil {
   205  			err = a.applyBaseRange(baseRange, changeValue)
   206  		} else {
   207  			err = a.applyNextKey(baseValue, changeValue)
   208  		}
   209  		if err != nil {
   210  			return err
   211  		}
   212  	}
   213  	if err := a.base.Err(); err != nil {
   214  		return err
   215  	}
   216  	if err := a.changes.Err(); err != nil {
   217  		return err
   218  	}
   219  	if a.haveBase {
   220  		if err := a.applyAllBase(a.base); err != nil {
   221  			return err
   222  		}
   223  	}
   224  
   225  	if a.haveChanges {
   226  		numAdded, err := a.applyAllChanges(a.changes)
   227  		if err != nil {
   228  			return err
   229  		}
   230  		if numAdded > 0 {
   231  			a.addIntoDiffSummary(graveler.DiffTypeAdded, numAdded)
   232  		}
   233  	}
   234  
   235  	if !a.opts.AllowEmpty && !a.hasChanges(a.summary) {
   236  		return graveler.ErrNoChanges
   237  	}
   238  	return a.changes.Err()
   239  }
   240  
   241  func Commit(ctx context.Context, writer MetaRangeWriter, base Iterator, changes graveler.ValueIterator, opts *CommitOptions) (graveler.DiffSummary, error) {
   242  	c := committer{
   243  		ctx:     ctx,
   244  		logger:  logging.FromContext(ctx),
   245  		writer:  writer,
   246  		base:    base,
   247  		changes: changes,
   248  		opts:    opts,
   249  		summary: graveler.DiffSummary{Count: make(map[graveler.DiffType]int)},
   250  	}
   251  	return c.summary, c.commit()
   252  }