github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/retention/active_commits.go (about)

     1  package retention
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"time"
     8  
     9  	"github.com/treeverse/lakefs/pkg/graveler"
    10  )
    11  
    12  type CommitNode struct {
    13  	CreationDate time.Time
    14  	MainParent   graveler.CommitID
    15  	MetaRangeID  graveler.MetaRangeID
    16  }
    17  
    18  func NewCommitNode(creationDate time.Time, mainParent graveler.CommitID, metaRangeID graveler.MetaRangeID) CommitNode {
    19  	return CommitNode{
    20  		CreationDate: creationDate,
    21  		MainParent:   mainParent,
    22  		MetaRangeID:  metaRangeID,
    23  	}
    24  }
    25  
    26  var ErrCommitNotFound = errors.New("commit not found")
    27  
    28  // GetGarbageCollectionCommits returns the sets of active commits, according to the repository's garbage collection rules.
    29  // See https://github.com/treeverse/lakeFS/issues/1932 for more details.
    30  // Upon completion, the given startingPointIterator is closed.
    31  func GetGarbageCollectionCommits(ctx context.Context, startingPointIterator *GCStartingPointIterator, commitGetter *RepositoryCommitGetter, rules *graveler.GarbageCollectionRules) (map[graveler.CommitID]graveler.MetaRangeID, error) {
    32  	// From each starting point in the given startingPointIterator, it iterates through its main ancestry.
    33  	// All commits reached are added to the active set, until and including the first commit performed before the start of the retention period.
    34  	processed := make(map[graveler.CommitID]time.Time)
    35  	activeMap := make(map[graveler.CommitID]struct{})
    36  
    37  	commitsIterator, err := commitGetter.ListCommits(ctx)
    38  	if err != nil {
    39  		return nil, err
    40  	}
    41  	commitsMap := make(map[graveler.CommitID]CommitNode)
    42  	defer commitsIterator.Close()
    43  	for commitsIterator.Next() {
    44  		commitRecord := commitsIterator.Value()
    45  		var mainParent graveler.CommitID
    46  		if len(commitRecord.Commit.Parents) > 0 {
    47  			// every branch retains only its main ancestry, acquired by recursively taking the first parent:
    48  			mainParent = commitRecord.Commit.Parents[0]
    49  			if commitRecord.Commit.Version < graveler.CommitVersionParentSwitch {
    50  				mainParent = commitRecord.Commit.Parents[len(commitRecord.Commit.Parents)-1]
    51  			}
    52  		}
    53  		commitsMap[commitRecord.CommitID] = NewCommitNode(commitRecord.Commit.CreationDate, mainParent, commitRecord.MetaRangeID)
    54  	}
    55  
    56  	now := time.Now()
    57  	defer startingPointIterator.Close()
    58  	for startingPointIterator.Next() {
    59  		startingPoint := startingPointIterator.Value()
    60  		retentionDays := int(rules.DefaultRetentionDays)
    61  		commitNode, ok := commitsMap[startingPoint.CommitID]
    62  		if !ok {
    63  			return nil, fmt.Errorf("%w: %s", ErrCommitNotFound, startingPoint.CommitID)
    64  		}
    65  		if startingPoint.BranchID == "" {
    66  			// If the current commit is NOT a branch HEAD (a dangling commit) - add a hypothetical HEAD as its child
    67  			commitNode = CommitNode{
    68  				CreationDate: commitNode.CreationDate,
    69  				MainParent:   startingPoint.CommitID,
    70  			}
    71  		} else {
    72  			// If the current commit IS a branch HEAD - fetch and retention rules for this branch and...
    73  			var branchRetentionDays int32
    74  			if branchRetentionDays, ok = rules.BranchRetentionDays[string(startingPoint.BranchID)]; ok {
    75  				retentionDays = int(branchRetentionDays)
    76  			}
    77  			activeMap[startingPoint.CommitID] = struct{}{}
    78  		}
    79  		// Calculate the expiration time for the current commit
    80  		branchExpirationThreshold := now.AddDate(0, 0, -retentionDays)
    81  		if startingPoint.BranchID != "" {
    82  			// If the current commit IS a branch's HEAD, add it to the `processed` with the calculated expiration threshold.
    83  			// (it will be optionally examined later on by different commit paths to get the longest expiration threshold for a given commit)
    84  			processed[startingPoint.CommitID] = branchExpirationThreshold
    85  		}
    86  		// Start traversing the commit's ancestors (path):
    87  		for commitNode.MainParent != "" {
    88  			nextCommitID := commitNode.MainParent
    89  			var previousThreshold time.Time
    90  			if previousThreshold, ok = processed[nextCommitID]; ok && !previousThreshold.After(branchExpirationThreshold) {
    91  				// If the parent commit was already processed and its threshold was longer than the current threshold,
    92  				// i.e. the current threshold doesn't hold for it, stop processing it because the other path decision
    93  				// wins
    94  				break
    95  			}
    96  			if commitNode.CreationDate.After(branchExpirationThreshold) {
    97  				// If the current commit creation time is after the threshold, then its parent is active because the
    98  				// definition for 'active' is either creation time is after the threshold, or the first beyond
    99  				// the threshold. In either way, the PARENT is active.
   100  				activeMap[nextCommitID] = struct{}{}
   101  			}
   102  			// Continue down the rabbit hole.
   103  			commitNode, ok = commitsMap[nextCommitID]
   104  			if !ok {
   105  				return nil, fmt.Errorf("%w: %s", ErrCommitNotFound, nextCommitID)
   106  			}
   107  			// Set the parent commit ID's expiration threshold as the current (this is true because this one is the
   108  			// longest, because we wouldn't have gotten here otherwise)
   109  			processed[nextCommitID] = branchExpirationThreshold
   110  		}
   111  	}
   112  	if startingPointIterator.Err() != nil {
   113  		return nil, startingPointIterator.Err()
   114  	}
   115  	return makeCommitMap(commitsMap, activeMap), nil
   116  }
   117  
   118  func makeCommitMap(commitNodes map[graveler.CommitID]CommitNode, commitSet map[graveler.CommitID]struct{}) map[graveler.CommitID]graveler.MetaRangeID {
   119  	res := make(map[graveler.CommitID]graveler.MetaRangeID)
   120  	for commitID := range commitSet {
   121  		res[commitID] = commitNodes[commitID].MetaRangeID
   122  	}
   123  	return res
   124  }