github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/retention/active_commits_test.go (about) 1 package retention 2 3 import ( 4 "context" 5 "sort" 6 "testing" 7 "time" 8 9 "github.com/go-test/deep" 10 "github.com/golang/mock/gomock" 11 "github.com/treeverse/lakefs/pkg/graveler" 12 "github.com/treeverse/lakefs/pkg/graveler/mock" 13 "github.com/treeverse/lakefs/pkg/graveler/testutil" 14 ) 15 16 type testCommit struct { 17 daysPassed int 18 parents []graveler.CommitID 19 } 20 21 func newTestCommit(daysPassed int, parents ...graveler.CommitID) testCommit { 22 return testCommit{ 23 daysPassed: daysPassed, 24 parents: parents, 25 } 26 } 27 28 // findMainAncestryLeaves returns commits which are not the first parent of any child. 29 func findMainAncestryLeaves(now time.Time, heads map[string]int32, commits map[string]testCommit) []*graveler.CommitRecord { 30 var res []*graveler.CommitRecord 31 for commitID1, commit1 := range commits { 32 if _, ok := heads[commitID1]; ok { 33 continue 34 } 35 isLeaf := true 36 for _, commit2 := range commits { 37 if len(commit2.parents) == 0 { 38 continue 39 } 40 if commitID1 == string(commit2.parents[0]) { 41 isLeaf = false 42 } 43 } 44 if isLeaf { 45 res = append(res, &graveler.CommitRecord{ 46 CommitID: graveler.CommitID(commitID1), 47 Commit: &graveler.Commit{ 48 Version: graveler.CurrentCommitVersion, 49 CreationDate: now.AddDate(0, 0, -commit1.daysPassed), 50 Parents: commit1.parents, 51 }, 52 }) 53 } 54 } 55 sort.Slice(res, func(i, j int) bool { 56 return res[i].CommitID < res[j].CommitID 57 }) 58 return res 59 } 60 61 func TestActiveCommits(t *testing.T) { 62 tests := map[string]struct { 63 commits map[string]testCommit 64 headsRetentionDays map[string]int32 65 expectedActiveIDs []string 66 }{ 67 "two_branches": { 68 commits: map[string]testCommit{ 69 "a": newTestCommit(15), 70 "b": newTestCommit(10, "a"), 71 "c": newTestCommit(10, "a"), 72 "d": newTestCommit(5, "c"), 73 "e": newTestCommit(5, "b"), 74 "f": newTestCommit(1, "e"), 75 }, 76 headsRetentionDays: map[string]int32{"f": 7, "d": 3}, 77 expectedActiveIDs: []string{"b", "d", "e", "f"}, 78 }, 79 "old_heads": { 80 commits: map[string]testCommit{ 81 "a": newTestCommit(15), 82 "b": newTestCommit(20, "a"), 83 "c": newTestCommit(20, "a"), 84 "d": newTestCommit(20, "a"), 85 }, 86 headsRetentionDays: map[string]int32{"b": 7, "c": 7, "d": 7}, 87 expectedActiveIDs: []string{"b", "c", "d"}, 88 }, 89 "all_commits_active": { 90 commits: map[string]testCommit{ 91 "a": newTestCommit(5), 92 "b": newTestCommit(4, "a"), 93 "c": newTestCommit(3, "b"), 94 "d": newTestCommit(2, "b"), 95 "e": newTestCommit(1, "b"), 96 }, 97 headsRetentionDays: map[string]int32{"d": 15, "e": 7, "c": 2}, 98 expectedActiveIDs: []string{"a", "b", "c", "d", "e"}, 99 }, 100 "merge": { 101 commits: map[string]testCommit{ 102 "a": newTestCommit(7), 103 "b": newTestCommit(6, "a"), 104 "c": newTestCommit(7), 105 "d": newTestCommit(6, "c", "a"), 106 }, 107 headsRetentionDays: map[string]int32{"b": 3, "d": 10}, 108 expectedActiveIDs: []string{"b", "c", "d"}, 109 }, 110 "two_branches_with_previously_expired": { 111 commits: map[string]testCommit{ 112 "a": newTestCommit(15), 113 "b": newTestCommit(10, "a"), 114 "c": newTestCommit(10, "a"), 115 "d": newTestCommit(5, "c"), 116 "e": newTestCommit(7, "b"), 117 "f": newTestCommit(1, "e"), 118 }, 119 headsRetentionDays: map[string]int32{"f": 7, "d": 3}, 120 expectedActiveIDs: []string{"d", "e", "f"}, 121 }, 122 "merge_in_history": { 123 // graph taken from git core tests 124 // E---D---C---B---A 125 // \"-_ \ \ 126 // \ `---------G \ 127 // \ \ 128 // F----------------H 129 commits: map[string]testCommit{ 130 "e": newTestCommit(21), 131 "d": newTestCommit(20, "e"), 132 "f": newTestCommit(19, "e"), 133 "c": newTestCommit(18, "e"), 134 "b": newTestCommit(17, "d"), 135 "a": newTestCommit(4, "c"), 136 "g": newTestCommit(4, "b", "e"), 137 "h": newTestCommit(3, "a", "f"), 138 }, 139 headsRetentionDays: map[string]int32{"h": 14, "g": 7, "f": 7}, 140 expectedActiveIDs: []string{"h", "a", "b", "c", "f", "g"}, 141 }, 142 "dangling_commits_active": { 143 commits: map[string]testCommit{ 144 "a": newTestCommit(15), 145 "b": newTestCommit(10, "a"), 146 "c": newTestCommit(10, "a"), 147 "d": newTestCommit(5, "c"), 148 "e": newTestCommit(5, "b"), 149 "f": newTestCommit(1, "e"), 150 "g": newTestCommit(8, "c"), 151 "h": newTestCommit(7, "g"), 152 "i": newTestCommit(4, "h"), 153 }, 154 headsRetentionDays: map[string]int32{"f": 7, "d": 3}, 155 expectedActiveIDs: []string{"b", "d", "e", "f", "h", "i"}, 156 }, 157 "dangling_commits_expired": { 158 commits: map[string]testCommit{ 159 "a": newTestCommit(15), 160 "b": newTestCommit(10, "a"), 161 "c": newTestCommit(10, "a"), 162 "d": newTestCommit(5, "c"), 163 "e": newTestCommit(5, "b"), 164 "f": newTestCommit(1, "e"), 165 "g": newTestCommit(8, "c"), 166 "h": newTestCommit(7, "g"), 167 "i": newTestCommit(6, "h"), 168 }, 169 headsRetentionDays: map[string]int32{"f": 7, "d": 3}, 170 expectedActiveIDs: []string{"b", "d", "e", "f"}, 171 }, 172 173 "dangling_from_before_expired": { 174 commits: map[string]testCommit{ 175 "root": newTestCommit(20), 176 "pre_expired": newTestCommit(20, "root"), 177 "e1": newTestCommit(15, "pre_expired"), 178 "b": newTestCommit(10, "e1"), 179 "c": newTestCommit(10, "e1"), 180 "d": newTestCommit(5, "c"), 181 "e": newTestCommit(8, "b"), 182 "f": newTestCommit(1, "e"), 183 "g": newTestCommit(10, "root"), // dangling 184 "h": newTestCommit(6, "g"), // dangling 185 }, 186 headsRetentionDays: map[string]int32{"f": 7, "d": 3}, 187 expectedActiveIDs: []string{"d", "e", "f"}, 188 }, 189 "retained_by_non_leaf_head": { 190 // commit x is retained because of the rule of head2, and not the rule of head1. 191 commits: map[string]testCommit{ 192 "root": newTestCommit(20), 193 "x": newTestCommit(14, "root"), 194 "head2": newTestCommit(10, "x"), 195 "head1": newTestCommit(9, "head2"), 196 }, 197 headsRetentionDays: map[string]int32{"head1": 9, "head2": 12}, 198 expectedActiveIDs: []string{"head1", "head2", "x"}, 199 }, 200 /* 201 <ep1- 8 days> 202 \ 203 <e4- 7 days> -- <e1- 7 days> -- <h1- 1 day> (5-day-retention) 204 / 205 <ep2- 8 days> -- <e2- 7 days> -- <h2- 1 day> (5-day-retention) 206 \ 207 <e5- 6 days> -- <e3- 6 days> -- <h3- 1 day> (5-day-retention) 208 */ 209 "reachable_previously_expired": { 210 commits: map[string]testCommit{ 211 "ep1": newTestCommit(8), 212 "ep2": newTestCommit(8), 213 "e5": newTestCommit(6, "ep2"), 214 "e4": newTestCommit(7, "ep1", "ep2"), 215 "e3": newTestCommit(6, "e5"), // expired yet active 216 "e2": newTestCommit(6, "ep2"), // expired yet active 217 "e1": newTestCommit(7, "e4"), // expired yet active 218 "h3": newTestCommit(1, "e3"), 219 "h2": newTestCommit(1, "e2"), 220 "h1": newTestCommit(1, "e1"), 221 }, 222 headsRetentionDays: map[string]int32{"h1": 5, "h2": 5, "h3": 5}, 223 expectedActiveIDs: []string{"h1", "h2", "h3", "e1", "e2", "e3"}, 224 }, 225 } 226 for name, tst := range tests { 227 t.Run(name, func(t *testing.T) { 228 now := time.Now() 229 ctrl := gomock.NewController(t) 230 refManagerMock := mock.NewMockRefManager(ctrl) 231 ctx := context.Background() 232 repositoryRecord := &graveler.RepositoryRecord{ 233 RepositoryID: "test", 234 } 235 garbageCollectionRules := &graveler.GarbageCollectionRules{DefaultRetentionDays: 5, BranchRetentionDays: make(map[string]int32)} 236 var branches []*graveler.BranchRecord 237 for head, retentionDays := range tst.headsRetentionDays { 238 branches = append(branches, &graveler.BranchRecord{ 239 BranchID: graveler.BranchID(head), 240 Branch: &graveler.Branch{ 241 CommitID: graveler.CommitID(head), 242 }, 243 }) 244 garbageCollectionRules.BranchRetentionDays[head] = retentionDays 245 } 246 sort.Slice(branches, func(i, j int) bool { 247 return branches[i].CommitID < branches[j].CommitID 248 }) 249 250 var commitsRecords []*graveler.CommitRecord 251 for commitID, commit := range tst.commits { 252 commitsRecords = append(commitsRecords, &graveler.CommitRecord{ 253 CommitID: graveler.CommitID(commitID), 254 Commit: &graveler.Commit{ 255 Parents: commit.parents, 256 CreationDate: now.AddDate(0, 0, -commit.daysPassed), 257 Version: graveler.CurrentCommitVersion, 258 MetaRangeID: graveler.MetaRangeID("mr-" + commitID), 259 }, 260 }) 261 } 262 263 refManagerMock.EXPECT().ListCommits(ctx, repositoryRecord).Return(testutil.NewFakeCommitIterator(commitsRecords), nil).MaxTimes(1) 264 265 gcCommits, err := GetGarbageCollectionCommits(ctx, NewGCStartingPointIterator( 266 testutil.NewFakeCommitIterator(findMainAncestryLeaves(now, tst.headsRetentionDays, tst.commits)), 267 testutil.NewFakeBranchIterator(branches)), &RepositoryCommitGetter{ 268 refManager: refManagerMock, 269 repository: repositoryRecord, 270 }, garbageCollectionRules) 271 if err != nil { 272 t.Fatalf("failed to find expired commits: %v", err) 273 } 274 validateMetaRangeIDs(t, gcCommits) 275 activeCommitIDs := testMapToCommitIDs(gcCommits) 276 277 sort.Strings(tst.expectedActiveIDs) 278 sort.Slice(activeCommitIDs, func(i, j int) bool { 279 return activeCommitIDs[i].Ref() < activeCommitIDs[j].Ref() 280 }) 281 if diff := deep.Equal(tst.expectedActiveIDs, testToStringArray(activeCommitIDs)); diff != nil { 282 t.Errorf("active commits ids diff=%s", diff) 283 } 284 }) 285 } 286 } 287 288 func validateMetaRangeIDs(t *testing.T, commits map[graveler.CommitID]graveler.MetaRangeID) { 289 for commitID, metaRangeID := range commits { 290 if string(metaRangeID) != "mr-"+string(commitID) { 291 t.Errorf("unexpected metarange ID for commit %s. expected=%s, got=%s.", commitID, "mr-"+commitID, metaRangeID) 292 } 293 } 294 } 295 296 func testMapToCommitIDs(commits map[graveler.CommitID]graveler.MetaRangeID) []graveler.CommitID { 297 res := make([]graveler.CommitID, 0, len(commits)) 298 for commitID := range commits { 299 res = append(res, commitID) 300 } 301 return res 302 } 303 304 func testToStringArray(commitIDs []graveler.CommitID) []string { 305 res := make([]string, len(commitIDs)) 306 for i := range commitIDs { 307 res[i] = string(commitIDs[i]) 308 } 309 return res 310 }