github.com/x-oss-byte/git-lfs@v2.5.2+incompatible/git/githistory/rewriter_test.go (about)

     1  package githistory
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/hex"
     6  	"io"
     7  	"io/ioutil"
     8  	"reflect"
     9  	"strconv"
    10  	"strings"
    11  	"testing"
    12  
    13  	"github.com/git-lfs/git-lfs/errors"
    14  	"github.com/git-lfs/git-lfs/filepathfilter"
    15  	"github.com/git-lfs/gitobj"
    16  	"github.com/stretchr/testify/assert"
    17  )
    18  
    19  func TestRewriterRewritesHistory(t *testing.T) {
    20  	db := DatabaseFromFixture(t, "linear-history.git")
    21  	r := NewRewriter(db)
    22  
    23  	tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
    24  		BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
    25  			contents, err := ioutil.ReadAll(b.Contents)
    26  			if err != nil {
    27  				return nil, err
    28  			}
    29  
    30  			n, err := strconv.Atoi(string(contents))
    31  			if err != nil {
    32  				return nil, err
    33  			}
    34  
    35  			rewritten := strconv.Itoa(n + 1)
    36  
    37  			return &gitobj.Blob{
    38  				Contents: strings.NewReader(rewritten),
    39  				Size:     int64(len(rewritten)),
    40  			}, nil
    41  		},
    42  	})
    43  
    44  	assert.Nil(t, err)
    45  
    46  	tree1 := "ad0aebd16e34cf047820994ea7538a6d4a111082"
    47  	tree2 := "6e07bd31cb70c4add2c973481ad4fa38b235ca69"
    48  	tree3 := "c5decfe1fcf39b8c489f4a0bf3b3823676339f80"
    49  
    50  	// After rewriting, the HEAD state of the repository should contain a
    51  	// tree identical to:
    52  	//
    53  	//   100644 blob bf0d87ab1b2b0ec1a11a3973d2845b42413d9767   hello.txt
    54  
    55  	AssertCommitTree(t, db, hex.EncodeToString(tip), tree1)
    56  
    57  	AssertBlobContents(t, db, tree1, "hello.txt", "4")
    58  
    59  	// After rewriting, the HEAD~1 state of the repository should contain a
    60  	// tree identical to:
    61  	//
    62  	//   100644 blob e440e5c842586965a7fb77deda2eca68612b1f53   hello.txt
    63  
    64  	AssertCommitParent(t, db, hex.EncodeToString(tip), "4aaa3f49ffeabbb874250fe13ffeb8c683aba650")
    65  	AssertCommitTree(t, db, "4aaa3f49ffeabbb874250fe13ffeb8c683aba650", tree2)
    66  
    67  	AssertBlobContents(t, db, tree2, "hello.txt", "3")
    68  
    69  	// After rewriting, the HEAD~2 state of the repository should contain a
    70  	// tree identical to:
    71  	//
    72  	//   100644 blob d8263ee9860594d2806b0dfd1bfd17528b0ba2a4   hello.txt
    73  
    74  	AssertCommitParent(t, db, "4aaa3f49ffeabbb874250fe13ffeb8c683aba650", "24a341e1ff75addc22e336a8d87f82ba56b86fcf")
    75  	AssertCommitTree(t, db, "24a341e1ff75addc22e336a8d87f82ba56b86fcf", tree3)
    76  
    77  	AssertBlobContents(t, db, tree3, "hello.txt", "2")
    78  }
    79  
    80  func TestRewriterRewritesOctopusMerges(t *testing.T) {
    81  	db := DatabaseFromFixture(t, "octopus-merge.git")
    82  	r := NewRewriter(db)
    83  
    84  	tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
    85  		BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
    86  			return &gitobj.Blob{
    87  				Contents: io.MultiReader(b.Contents, strings.NewReader("_new")),
    88  				Size:     b.Size + int64(len("_new")),
    89  			}, nil
    90  		},
    91  	})
    92  
    93  	assert.Nil(t, err)
    94  
    95  	tree := "8a56716daa78325c3d0433cc163890969810b0da"
    96  
    97  	// After rewriting, the HEAD state of the repository should contain a
    98  	// tree identical to:
    99  	//
   100  	//   100644 blob 309f7fc2bfd9ae77b4131cf9cbcc3b548c42ca57    a.txt
   101  	//   100644 blob 70470dc26cb3eef54fe3dcba53066f7ca7c495c0    b.txt
   102  	//   100644 blob f2557f74fd5b60f959baf77091782089761e2dc3    hello.txt
   103  
   104  	AssertCommitTree(t, db, hex.EncodeToString(tip), tree)
   105  
   106  	AssertBlobContents(t, db, tree, "a.txt", "a_new")
   107  	AssertBlobContents(t, db, tree, "b.txt", "b_new")
   108  	AssertBlobContents(t, db, tree, "hello.txt", "hello_new")
   109  
   110  	// And should contain the following parents:
   111  	//
   112  	//   parent 1fe2b9577d5610e8d8fb2c3030534036fb648393
   113  	//   parent ca447959bdcd20253d69b227bcc7c2e1d3126d5c
   114  
   115  	AssertCommitParent(t, db, hex.EncodeToString(tip), "1fe2b9577d5610e8d8fb2c3030534036fb648393")
   116  	AssertCommitParent(t, db, hex.EncodeToString(tip), "ca447959bdcd20253d69b227bcc7c2e1d3126d5c")
   117  
   118  	// And each of those parents should contain the root commit as their own
   119  	// parent:
   120  
   121  	AssertCommitParent(t, db, "1fe2b9577d5610e8d8fb2c3030534036fb648393", "9237567f379b3c83ddf53ad9a2ae3755afb62a09")
   122  	AssertCommitParent(t, db, "ca447959bdcd20253d69b227bcc7c2e1d3126d5c", "9237567f379b3c83ddf53ad9a2ae3755afb62a09")
   123  }
   124  
   125  func TestRewriterVisitsPackedObjects(t *testing.T) {
   126  	db := DatabaseFromFixture(t, "packed-objects.git")
   127  	r := NewRewriter(db)
   128  
   129  	var contents []byte
   130  
   131  	_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
   132  		BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
   133  			var err error
   134  
   135  			contents, err = ioutil.ReadAll(b.Contents)
   136  			if err != nil {
   137  				return nil, err
   138  			}
   139  
   140  			return &gitobj.Blob{
   141  				Contents: bytes.NewReader(contents),
   142  				Size:     int64(len(contents)),
   143  			}, nil
   144  		},
   145  	})
   146  
   147  	assert.NoError(t, err)
   148  	assert.Equal(t, string(contents), "Hello, world!\n")
   149  }
   150  
   151  func TestRewriterDoesntVisitUnchangedSubtrees(t *testing.T) {
   152  	db := DatabaseFromFixture(t, "repeated-subtrees.git")
   153  	r := NewRewriter(db)
   154  
   155  	seen := make(map[string]int)
   156  
   157  	_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
   158  		BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
   159  			seen[path] = seen[path] + 1
   160  
   161  			return b, nil
   162  		},
   163  	})
   164  
   165  	assert.Nil(t, err)
   166  
   167  	assert.Equal(t, 2, seen["a.txt"])
   168  	assert.Equal(t, 1, seen["subdir/b.txt"])
   169  }
   170  
   171  func TestRewriterVisitsUniqueEntriesWithIdenticalContents(t *testing.T) {
   172  	db := DatabaseFromFixture(t, "identical-blobs.git")
   173  	r := NewRewriter(db)
   174  
   175  	tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
   176  		BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
   177  			if path == "b.txt" {
   178  				return b, nil
   179  			}
   180  
   181  			return &gitobj.Blob{
   182  				Contents: strings.NewReader("changed"),
   183  				Size:     int64(len("changed")),
   184  			}, nil
   185  		},
   186  	})
   187  
   188  	assert.Nil(t, err)
   189  
   190  	tree := "bbbe0a7676523ae02234bfe874784ca2380c2d4b"
   191  
   192  	AssertCommitTree(t, db, hex.EncodeToString(tip), tree)
   193  
   194  	// After rewriting, the HEAD state of the repository should contain a
   195  	// tree identical to:
   196  	//
   197  	//   100644 blob 21fb1eca31e64cd3914025058b21992ab76edcf9    a.txt
   198  	//   100644 blob 94f3610c08588440112ed977376f26a8fba169b0    b.txt
   199  
   200  	AssertBlobContents(t, db, tree, "a.txt", "changed")
   201  	AssertBlobContents(t, db, tree, "b.txt", "original")
   202  }
   203  
   204  func TestRewriterIgnoresPathsThatDontMatchFilter(t *testing.T) {
   205  	include := []string{"*.txt"}
   206  	exclude := []string{"subdir/*.txt"}
   207  
   208  	filter := filepathfilter.New(include, exclude)
   209  
   210  	db := DatabaseFromFixture(t, "non-repeated-subtrees.git")
   211  	r := NewRewriter(db, WithFilter(filter))
   212  
   213  	seen := make(map[string]int)
   214  
   215  	_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
   216  		BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
   217  			seen[path] = seen[path] + 1
   218  
   219  			return b, nil
   220  		},
   221  	})
   222  
   223  	assert.Nil(t, err)
   224  	assert.Equal(t, 1, seen["a.txt"])
   225  	assert.Equal(t, 0, seen["subdir/b.txt"])
   226  }
   227  
   228  func TestRewriterAllowsAdditionalTreeEntries(t *testing.T) {
   229  	db := DatabaseFromFixture(t, "linear-history.git")
   230  	r := NewRewriter(db)
   231  
   232  	extra, err := db.WriteBlob(&gitobj.Blob{
   233  		Contents: strings.NewReader("extra\n"),
   234  		Size:     int64(len("extra\n")),
   235  	})
   236  	assert.Nil(t, err)
   237  
   238  	tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
   239  		BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
   240  			return b, nil
   241  		},
   242  
   243  		TreeCallbackFn: func(path string, tr *gitobj.Tree) (*gitobj.Tree, error) {
   244  			return &gitobj.Tree{
   245  				Entries: append(tr.Entries, &gitobj.TreeEntry{
   246  					Name:     "extra.txt",
   247  					Filemode: 0100644,
   248  					Oid:      extra,
   249  				}),
   250  			}, nil
   251  		},
   252  	})
   253  
   254  	assert.Nil(t, err)
   255  
   256  	tree1 := "40c2eb627a3b8e84b82a47a973d32960f3898b6a"
   257  	tree2 := "d7a5bcb69f2cd2652a014663a948952ea603c2c0"
   258  	tree3 := "45b752554d128f85bf23d7c3ddf48c47cbc345c8"
   259  
   260  	// After rewriting, the HEAD state of the repository should contain a
   261  	// tree identical to:
   262  	//
   263  	//   100644 blob e440e5c842586965a7fb77deda2eca68612b1f53    hello.txt
   264  	//   100644 blob 0f2287157f7cb0dd40498c7a92f74b6975fa2d57    extra.txt
   265  
   266  	AssertCommitTree(t, db, hex.EncodeToString(tip), tree1)
   267  
   268  	AssertBlobContents(t, db, tree1, "hello.txt", "3")
   269  	AssertBlobContents(t, db, tree1, "extra.txt", "extra\n")
   270  
   271  	// After rewriting, the HEAD~1 state of the repository should contain a
   272  	// tree identical to:
   273  	//
   274  	//   100644 blob d8263ee9860594d2806b0dfd1bfd17528b0ba2a4    hello.txt
   275  	//   100644 blob 0f2287157f7cb0dd40498c7a92f74b6975fa2d57    extra.txt
   276  
   277  	AssertCommitParent(t, db, hex.EncodeToString(tip), "45af5deb9a25bc4069b15c1f5bdccb0340978707")
   278  	AssertCommitTree(t, db, "45af5deb9a25bc4069b15c1f5bdccb0340978707", tree2)
   279  
   280  	AssertBlobContents(t, db, tree2, "hello.txt", "2")
   281  	AssertBlobContents(t, db, tree2, "extra.txt", "extra\n")
   282  
   283  	// After rewriting, the HEAD~2 state of the repository should contain a
   284  	// tree identical to:
   285  	//
   286  	//   100644 blob 56a6051ca2b02b04ef92d5150c9ef600403cb1de    hello.txt
   287  	//   100644 blob 0f2287157f7cb0dd40498c7a92f74b6975fa2d57    extra.txt
   288  
   289  	AssertCommitParent(t, db, "45af5deb9a25bc4069b15c1f5bdccb0340978707", "99f6bd7cd69b45494afed95b026f3e450de8304f")
   290  	AssertCommitTree(t, db, "99f6bd7cd69b45494afed95b026f3e450de8304f", tree3)
   291  
   292  	AssertBlobContents(t, db, tree3, "hello.txt", "1")
   293  	AssertBlobContents(t, db, tree3, "extra.txt", "extra\n")
   294  }
   295  
   296  // CallbackCall is a structure recording information pertinent to when a
   297  // *githistory.Rewrite called either BlobFn, TreePreCallbackFn, or
   298  // TreeCallbackFn.
   299  type CallbackCall struct {
   300  	Type string
   301  	Path string
   302  }
   303  
   304  var (
   305  	// collectCalls is a function that returns a *RewriteOptions that
   306  	// updates a pointer to a slice of `*CallbackCall`'s with each call that
   307  	// is received.
   308  	collectCalls = func(calls *[]*CallbackCall) *RewriteOptions {
   309  		return &RewriteOptions{Include: []string{"refs/heads/master"},
   310  			BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
   311  				*calls = append(*calls, &CallbackCall{
   312  					Type: "blob",
   313  					Path: path,
   314  				})
   315  				return b, nil
   316  			},
   317  
   318  			TreePreCallbackFn: func(path string, t *gitobj.Tree) error {
   319  				*calls = append(*calls, &CallbackCall{
   320  					Type: "tree-pre",
   321  					Path: path,
   322  				})
   323  				return nil
   324  			},
   325  
   326  			TreeCallbackFn: func(path string, t *gitobj.Tree) (*gitobj.Tree, error) {
   327  				*calls = append(*calls, &CallbackCall{
   328  					Type: "tree-post",
   329  					Path: path,
   330  				})
   331  				return t, nil
   332  			},
   333  		}
   334  	}
   335  )
   336  
   337  func TestHistoryRewriterCallbacks(t *testing.T) {
   338  	var calls []*CallbackCall
   339  
   340  	db := DatabaseFromFixture(t, "linear-history.git")
   341  	r := NewRewriter(db)
   342  
   343  	_, err := r.Rewrite(collectCalls(&calls))
   344  
   345  	assert.Nil(t, err)
   346  
   347  	assert.Len(t, calls, 9)
   348  	assert.Equal(t, calls[0], &CallbackCall{Type: "tree-pre", Path: "/"})
   349  	assert.Equal(t, calls[1], &CallbackCall{Type: "blob", Path: "hello.txt"})
   350  	assert.Equal(t, calls[2], &CallbackCall{Type: "tree-post", Path: "/"})
   351  	assert.Equal(t, calls[3], &CallbackCall{Type: "tree-pre", Path: "/"})
   352  	assert.Equal(t, calls[4], &CallbackCall{Type: "blob", Path: "hello.txt"})
   353  	assert.Equal(t, calls[5], &CallbackCall{Type: "tree-post", Path: "/"})
   354  	assert.Equal(t, calls[6], &CallbackCall{Type: "tree-pre", Path: "/"})
   355  	assert.Equal(t, calls[7], &CallbackCall{Type: "blob", Path: "hello.txt"})
   356  	assert.Equal(t, calls[8], &CallbackCall{Type: "tree-post", Path: "/"})
   357  }
   358  
   359  func TestHistoryRewriterCallbacksSubtrees(t *testing.T) {
   360  	var calls []*CallbackCall
   361  
   362  	db := DatabaseFromFixture(t, "non-repeated-subtrees.git")
   363  	r := NewRewriter(db)
   364  
   365  	_, err := r.Rewrite(collectCalls(&calls))
   366  
   367  	assert.Nil(t, err)
   368  
   369  	assert.Len(t, calls, 8)
   370  	assert.Equal(t, calls[0], &CallbackCall{Type: "tree-pre", Path: "/"})
   371  	assert.Equal(t, calls[1], &CallbackCall{Type: "blob", Path: "a.txt"})
   372  	assert.Equal(t, calls[2], &CallbackCall{Type: "tree-post", Path: "/"})
   373  	assert.Equal(t, calls[3], &CallbackCall{Type: "tree-pre", Path: "/"})
   374  	assert.Equal(t, calls[4], &CallbackCall{Type: "tree-pre", Path: "/subdir"})
   375  	assert.Equal(t, calls[5], &CallbackCall{Type: "blob", Path: "subdir/b.txt"})
   376  	assert.Equal(t, calls[6], &CallbackCall{Type: "tree-post", Path: "/subdir"})
   377  	assert.Equal(t, calls[7], &CallbackCall{Type: "tree-post", Path: "/"})
   378  }
   379  
   380  func TestHistoryRewriterTreePreCallbackPropagatesErrors(t *testing.T) {
   381  	expected := errors.Errorf("my error")
   382  
   383  	db := DatabaseFromFixture(t, "linear-history.git")
   384  	r := NewRewriter(db)
   385  
   386  	_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
   387  		BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
   388  			return b, nil
   389  		},
   390  
   391  		TreePreCallbackFn: func(path string, t *gitobj.Tree) error {
   392  			return expected
   393  		},
   394  	})
   395  
   396  	assert.Equal(t, err, expected)
   397  }
   398  
   399  func TestHistoryRewriterUseOriginalParentsForPartialMigration(t *testing.T) {
   400  	db := DatabaseFromFixture(t, "linear-history-with-tags.git")
   401  	r := NewRewriter(db)
   402  
   403  	tip, err := r.Rewrite(&RewriteOptions{
   404  		Include: []string{"refs/heads/master"},
   405  		Exclude: []string{"refs/tags/middle"},
   406  
   407  		BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
   408  			return b, nil
   409  		},
   410  	})
   411  
   412  	// After rewriting, the rewriter should have only modified the latest
   413  	// commit (HEAD), and excluded the first two, both reachable by
   414  	// refs/tags/middle.
   415  	//
   416  	// This should modify one commit, and appropriately link the parent as
   417  	// follows:
   418  	//
   419  	//   tree 20ecedad3e74a113695fe5f00ab003694e2e1e9c
   420  	//   parent 228afe30855933151f7a88e70d9d88314fd2f191
   421  	//   author Taylor Blau <me@ttaylorr.com> 1496954214 -0600
   422  	//   committer Taylor Blau <me@ttaylorr.com> 1496954214 -0600
   423  	//
   424  	//   some.txt: c
   425  
   426  	expectedParent := "228afe30855933151f7a88e70d9d88314fd2f191"
   427  
   428  	assert.NoError(t, err)
   429  	AssertCommitParent(t, db, hex.EncodeToString(tip), expectedParent)
   430  }
   431  
   432  func TestHistoryRewriterUpdatesRefs(t *testing.T) {
   433  	db := DatabaseFromFixture(t, "linear-history.git")
   434  	r := NewRewriter(db)
   435  
   436  	AssertRef(t, db,
   437  		"refs/heads/master", HexDecode(t, "e669b63f829bfb0b91fc52a5bcea53dd7977a0ee"))
   438  
   439  	tip, err := r.Rewrite(&RewriteOptions{
   440  		Include: []string{"refs/heads/master"},
   441  
   442  		UpdateRefs: true,
   443  
   444  		BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
   445  			suffix := strings.NewReader("_suffix")
   446  
   447  			return &gitobj.Blob{
   448  				Contents: io.MultiReader(b.Contents, suffix),
   449  				Size:     b.Size + int64(suffix.Len()),
   450  			}, nil
   451  		},
   452  	})
   453  
   454  	assert.Nil(t, err)
   455  
   456  	c1 := hex.EncodeToString(tip)
   457  	c2 := "66561fe3ae68651658e18e48053dcfe66a2e9da1"
   458  	c3 := "8268d8486c48024a871fa42fc487dbeabd6e3d86"
   459  
   460  	AssertRef(t, db, "refs/heads/master", tip)
   461  
   462  	AssertCommitParent(t, db, c1, c2)
   463  	AssertCommitParent(t, db, c2, c3)
   464  }
   465  
   466  func TestHistoryRewriterReturnsFilter(t *testing.T) {
   467  	f := filepathfilter.New([]string{"a"}, []string{"b"})
   468  	r := NewRewriter(nil, WithFilter(f))
   469  
   470  	expected := reflect.ValueOf(f).Elem().Addr().Pointer()
   471  	got := reflect.ValueOf(r.Filter()).Elem().Addr().Pointer()
   472  
   473  	assert.Equal(t, expected, got,
   474  		"git/githistory: expected Rewriter.Filter() to return same *filepathfilter.Filter instance")
   475  }
   476  
   477  // debug is meant to be called from a defer statement to aide in debugging a
   478  // test failure among any in this file.
   479  //
   480  // Callers are expected to call it immediately after calling the Rewrite()
   481  // function.
   482  func debug(t *testing.T, db *gitobj.ObjectDatabase, tip []byte, err error) {
   483  	root, ok := db.Root()
   484  
   485  	t.Log(strings.Repeat("*", 80))
   486  	t.Logf("* root=%s, ok=%t\n", root, ok)
   487  	t.Logf("* tip=%x\n", tip)
   488  	t.Logf("* err=%s\n", err)
   489  	t.Log(strings.Repeat("*", 80))
   490  }