github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/retention/active_commits_test.go (about)

     1  package retention
     2  
     3  import (
     4  	"context"
     5  	"sort"
     6  	"testing"
     7  	"time"
     8  
     9  	"github.com/go-test/deep"
    10  	"github.com/golang/mock/gomock"
    11  	"github.com/treeverse/lakefs/pkg/graveler"
    12  	"github.com/treeverse/lakefs/pkg/graveler/mock"
    13  	"github.com/treeverse/lakefs/pkg/graveler/testutil"
    14  )
    15  
    16  type testCommit struct {
    17  	daysPassed int
    18  	parents    []graveler.CommitID
    19  }
    20  
    21  func newTestCommit(daysPassed int, parents ...graveler.CommitID) testCommit {
    22  	return testCommit{
    23  		daysPassed: daysPassed,
    24  		parents:    parents,
    25  	}
    26  }
    27  
    28  // findMainAncestryLeaves returns commits which are not the first parent of any child.
    29  func findMainAncestryLeaves(now time.Time, heads map[string]int32, commits map[string]testCommit) []*graveler.CommitRecord {
    30  	var res []*graveler.CommitRecord
    31  	for commitID1, commit1 := range commits {
    32  		if _, ok := heads[commitID1]; ok {
    33  			continue
    34  		}
    35  		isLeaf := true
    36  		for _, commit2 := range commits {
    37  			if len(commit2.parents) == 0 {
    38  				continue
    39  			}
    40  			if commitID1 == string(commit2.parents[0]) {
    41  				isLeaf = false
    42  			}
    43  		}
    44  		if isLeaf {
    45  			res = append(res, &graveler.CommitRecord{
    46  				CommitID: graveler.CommitID(commitID1),
    47  				Commit: &graveler.Commit{
    48  					Version:      graveler.CurrentCommitVersion,
    49  					CreationDate: now.AddDate(0, 0, -commit1.daysPassed),
    50  					Parents:      commit1.parents,
    51  				},
    52  			})
    53  		}
    54  	}
    55  	sort.Slice(res, func(i, j int) bool {
    56  		return res[i].CommitID < res[j].CommitID
    57  	})
    58  	return res
    59  }
    60  
    61  func TestActiveCommits(t *testing.T) {
    62  	tests := map[string]struct {
    63  		commits            map[string]testCommit
    64  		headsRetentionDays map[string]int32
    65  		expectedActiveIDs  []string
    66  	}{
    67  		"two_branches": {
    68  			commits: map[string]testCommit{
    69  				"a": newTestCommit(15),
    70  				"b": newTestCommit(10, "a"),
    71  				"c": newTestCommit(10, "a"),
    72  				"d": newTestCommit(5, "c"),
    73  				"e": newTestCommit(5, "b"),
    74  				"f": newTestCommit(1, "e"),
    75  			},
    76  			headsRetentionDays: map[string]int32{"f": 7, "d": 3},
    77  			expectedActiveIDs:  []string{"b", "d", "e", "f"},
    78  		},
    79  		"old_heads": {
    80  			commits: map[string]testCommit{
    81  				"a": newTestCommit(15),
    82  				"b": newTestCommit(20, "a"),
    83  				"c": newTestCommit(20, "a"),
    84  				"d": newTestCommit(20, "a"),
    85  			},
    86  			headsRetentionDays: map[string]int32{"b": 7, "c": 7, "d": 7},
    87  			expectedActiveIDs:  []string{"b", "c", "d"},
    88  		},
    89  		"all_commits_active": {
    90  			commits: map[string]testCommit{
    91  				"a": newTestCommit(5),
    92  				"b": newTestCommit(4, "a"),
    93  				"c": newTestCommit(3, "b"),
    94  				"d": newTestCommit(2, "b"),
    95  				"e": newTestCommit(1, "b"),
    96  			},
    97  			headsRetentionDays: map[string]int32{"d": 15, "e": 7, "c": 2},
    98  			expectedActiveIDs:  []string{"a", "b", "c", "d", "e"},
    99  		},
   100  		"merge": {
   101  			commits: map[string]testCommit{
   102  				"a": newTestCommit(7),
   103  				"b": newTestCommit(6, "a"),
   104  				"c": newTestCommit(7),
   105  				"d": newTestCommit(6, "c", "a"),
   106  			},
   107  			headsRetentionDays: map[string]int32{"b": 3, "d": 10},
   108  			expectedActiveIDs:  []string{"b", "c", "d"},
   109  		},
   110  		"two_branches_with_previously_expired": {
   111  			commits: map[string]testCommit{
   112  				"a": newTestCommit(15),
   113  				"b": newTestCommit(10, "a"),
   114  				"c": newTestCommit(10, "a"),
   115  				"d": newTestCommit(5, "c"),
   116  				"e": newTestCommit(7, "b"),
   117  				"f": newTestCommit(1, "e"),
   118  			},
   119  			headsRetentionDays: map[string]int32{"f": 7, "d": 3},
   120  			expectedActiveIDs:  []string{"d", "e", "f"},
   121  		},
   122  		"merge_in_history": {
   123  			// graph taken from git core tests
   124  			// E---D---C---B---A
   125  			// \"-_         \   \
   126  			//  \  `---------G   \
   127  			//   \                \
   128  			//    F----------------H
   129  			commits: map[string]testCommit{
   130  				"e": newTestCommit(21),
   131  				"d": newTestCommit(20, "e"),
   132  				"f": newTestCommit(19, "e"),
   133  				"c": newTestCommit(18, "e"),
   134  				"b": newTestCommit(17, "d"),
   135  				"a": newTestCommit(4, "c"),
   136  				"g": newTestCommit(4, "b", "e"),
   137  				"h": newTestCommit(3, "a", "f"),
   138  			},
   139  			headsRetentionDays: map[string]int32{"h": 14, "g": 7, "f": 7},
   140  			expectedActiveIDs:  []string{"h", "a", "b", "c", "f", "g"},
   141  		},
   142  		"dangling_commits_active": {
   143  			commits: map[string]testCommit{
   144  				"a": newTestCommit(15),
   145  				"b": newTestCommit(10, "a"),
   146  				"c": newTestCommit(10, "a"),
   147  				"d": newTestCommit(5, "c"),
   148  				"e": newTestCommit(5, "b"),
   149  				"f": newTestCommit(1, "e"),
   150  				"g": newTestCommit(8, "c"),
   151  				"h": newTestCommit(7, "g"),
   152  				"i": newTestCommit(4, "h"),
   153  			},
   154  			headsRetentionDays: map[string]int32{"f": 7, "d": 3},
   155  			expectedActiveIDs:  []string{"b", "d", "e", "f", "h", "i"},
   156  		},
   157  		"dangling_commits_expired": {
   158  			commits: map[string]testCommit{
   159  				"a": newTestCommit(15),
   160  				"b": newTestCommit(10, "a"),
   161  				"c": newTestCommit(10, "a"),
   162  				"d": newTestCommit(5, "c"),
   163  				"e": newTestCommit(5, "b"),
   164  				"f": newTestCommit(1, "e"),
   165  				"g": newTestCommit(8, "c"),
   166  				"h": newTestCommit(7, "g"),
   167  				"i": newTestCommit(6, "h"),
   168  			},
   169  			headsRetentionDays: map[string]int32{"f": 7, "d": 3},
   170  			expectedActiveIDs:  []string{"b", "d", "e", "f"},
   171  		},
   172  
   173  		"dangling_from_before_expired": {
   174  			commits: map[string]testCommit{
   175  				"root":        newTestCommit(20),
   176  				"pre_expired": newTestCommit(20, "root"),
   177  				"e1":          newTestCommit(15, "pre_expired"),
   178  				"b":           newTestCommit(10, "e1"),
   179  				"c":           newTestCommit(10, "e1"),
   180  				"d":           newTestCommit(5, "c"),
   181  				"e":           newTestCommit(8, "b"),
   182  				"f":           newTestCommit(1, "e"),
   183  				"g":           newTestCommit(10, "root"), // dangling
   184  				"h":           newTestCommit(6, "g"),     // dangling
   185  			},
   186  			headsRetentionDays: map[string]int32{"f": 7, "d": 3},
   187  			expectedActiveIDs:  []string{"d", "e", "f"},
   188  		},
   189  		"retained_by_non_leaf_head": {
   190  			// commit x is retained because of the rule of head2, and not the rule of head1.
   191  			commits: map[string]testCommit{
   192  				"root":  newTestCommit(20),
   193  				"x":     newTestCommit(14, "root"),
   194  				"head2": newTestCommit(10, "x"),
   195  				"head1": newTestCommit(9, "head2"),
   196  			},
   197  			headsRetentionDays: map[string]int32{"head1": 9, "head2": 12},
   198  			expectedActiveIDs:  []string{"head1", "head2", "x"},
   199  		},
   200  		/*
   201  			<ep1- 8 days>
   202  				\
   203  				  <e4- 7 days> -- <e1- 7 days> -- <h1- 1 day>        (5-day-retention)
   204  				/
   205  			<ep2- 8 days> -- <e2- 7 days> -- <h2- 1 day>             (5-day-retention)
   206  				\
   207  				  <e5- 6 days> -- <e3- 6 days> -- <h3- 1 day>        (5-day-retention)
   208  		*/
   209  		"reachable_previously_expired": {
   210  			commits: map[string]testCommit{
   211  				"ep1": newTestCommit(8),
   212  				"ep2": newTestCommit(8),
   213  				"e5":  newTestCommit(6, "ep2"),
   214  				"e4":  newTestCommit(7, "ep1", "ep2"),
   215  				"e3":  newTestCommit(6, "e5"),  // expired yet active
   216  				"e2":  newTestCommit(6, "ep2"), // expired yet active
   217  				"e1":  newTestCommit(7, "e4"),  // expired yet active
   218  				"h3":  newTestCommit(1, "e3"),
   219  				"h2":  newTestCommit(1, "e2"),
   220  				"h1":  newTestCommit(1, "e1"),
   221  			},
   222  			headsRetentionDays: map[string]int32{"h1": 5, "h2": 5, "h3": 5},
   223  			expectedActiveIDs:  []string{"h1", "h2", "h3", "e1", "e2", "e3"},
   224  		},
   225  	}
   226  	for name, tst := range tests {
   227  		t.Run(name, func(t *testing.T) {
   228  			now := time.Now()
   229  			ctrl := gomock.NewController(t)
   230  			refManagerMock := mock.NewMockRefManager(ctrl)
   231  			ctx := context.Background()
   232  			repositoryRecord := &graveler.RepositoryRecord{
   233  				RepositoryID: "test",
   234  			}
   235  			garbageCollectionRules := &graveler.GarbageCollectionRules{DefaultRetentionDays: 5, BranchRetentionDays: make(map[string]int32)}
   236  			var branches []*graveler.BranchRecord
   237  			for head, retentionDays := range tst.headsRetentionDays {
   238  				branches = append(branches, &graveler.BranchRecord{
   239  					BranchID: graveler.BranchID(head),
   240  					Branch: &graveler.Branch{
   241  						CommitID: graveler.CommitID(head),
   242  					},
   243  				})
   244  				garbageCollectionRules.BranchRetentionDays[head] = retentionDays
   245  			}
   246  			sort.Slice(branches, func(i, j int) bool {
   247  				return branches[i].CommitID < branches[j].CommitID
   248  			})
   249  
   250  			var commitsRecords []*graveler.CommitRecord
   251  			for commitID, commit := range tst.commits {
   252  				commitsRecords = append(commitsRecords, &graveler.CommitRecord{
   253  					CommitID: graveler.CommitID(commitID),
   254  					Commit: &graveler.Commit{
   255  						Parents:      commit.parents,
   256  						CreationDate: now.AddDate(0, 0, -commit.daysPassed),
   257  						Version:      graveler.CurrentCommitVersion,
   258  						MetaRangeID:  graveler.MetaRangeID("mr-" + commitID),
   259  					},
   260  				})
   261  			}
   262  
   263  			refManagerMock.EXPECT().ListCommits(ctx, repositoryRecord).Return(testutil.NewFakeCommitIterator(commitsRecords), nil).MaxTimes(1)
   264  
   265  			gcCommits, err := GetGarbageCollectionCommits(ctx, NewGCStartingPointIterator(
   266  				testutil.NewFakeCommitIterator(findMainAncestryLeaves(now, tst.headsRetentionDays, tst.commits)),
   267  				testutil.NewFakeBranchIterator(branches)), &RepositoryCommitGetter{
   268  				refManager: refManagerMock,
   269  				repository: repositoryRecord,
   270  			}, garbageCollectionRules)
   271  			if err != nil {
   272  				t.Fatalf("failed to find expired commits: %v", err)
   273  			}
   274  			validateMetaRangeIDs(t, gcCommits)
   275  			activeCommitIDs := testMapToCommitIDs(gcCommits)
   276  
   277  			sort.Strings(tst.expectedActiveIDs)
   278  			sort.Slice(activeCommitIDs, func(i, j int) bool {
   279  				return activeCommitIDs[i].Ref() < activeCommitIDs[j].Ref()
   280  			})
   281  			if diff := deep.Equal(tst.expectedActiveIDs, testToStringArray(activeCommitIDs)); diff != nil {
   282  				t.Errorf("active commits ids diff=%s", diff)
   283  			}
   284  		})
   285  	}
   286  }
   287  
   288  func validateMetaRangeIDs(t *testing.T, commits map[graveler.CommitID]graveler.MetaRangeID) {
   289  	for commitID, metaRangeID := range commits {
   290  		if string(metaRangeID) != "mr-"+string(commitID) {
   291  			t.Errorf("unexpected metarange ID for commit %s. expected=%s, got=%s.", commitID, "mr-"+commitID, metaRangeID)
   292  		}
   293  	}
   294  }
   295  
   296  func testMapToCommitIDs(commits map[graveler.CommitID]graveler.MetaRangeID) []graveler.CommitID {
   297  	res := make([]graveler.CommitID, 0, len(commits))
   298  	for commitID := range commits {
   299  		res = append(res, commitID)
   300  	}
   301  	return res
   302  }
   303  
   304  func testToStringArray(commitIDs []graveler.CommitID) []string {
   305  	res := make([]string, len(commitIDs))
   306  	for i := range commitIDs {
   307  		res[i] = string(commitIDs[i])
   308  	}
   309  	return res
   310  }