github.com/hawser/git-hawser@v2.5.2+incompatible/commands/command_migrate_import.go (about)

     1  package commands
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"encoding/hex"
     7  	"fmt"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  
    12  	"github.com/git-lfs/git-lfs/errors"
    13  	"github.com/git-lfs/git-lfs/filepathfilter"
    14  	"github.com/git-lfs/git-lfs/git"
    15  	"github.com/git-lfs/git-lfs/git/gitattr"
    16  	"github.com/git-lfs/git-lfs/git/githistory"
    17  	"github.com/git-lfs/git-lfs/lfs"
    18  	"github.com/git-lfs/git-lfs/tasklog"
    19  	"github.com/git-lfs/git-lfs/tools"
    20  	"github.com/git-lfs/gitobj"
    21  	"github.com/spf13/cobra"
    22  )
    23  
    24  func migrateImportCommand(cmd *cobra.Command, args []string) {
    25  	ensureWorkingCopyClean(os.Stdin, os.Stderr)
    26  
    27  	l := tasklog.NewLogger(os.Stderr)
    28  	defer l.Close()
    29  
    30  	db, err := getObjectDatabase()
    31  	if err != nil {
    32  		ExitWithError(err)
    33  	}
    34  	defer db.Close()
    35  
    36  	if migrateNoRewrite {
    37  		if migrateFixup {
    38  			ExitWithError(errors.Errorf("fatal: --no-rewrite and --fixup cannot be combined"))
    39  		}
    40  
    41  		if len(args) == 0 {
    42  			ExitWithError(errors.Errorf("fatal: expected one or more files with --no-rewrite"))
    43  		}
    44  
    45  		ref, err := git.CurrentRef()
    46  		if err != nil {
    47  			ExitWithError(errors.Wrap(err, "fatal: unable to find current reference"))
    48  		}
    49  
    50  		sha, _ := hex.DecodeString(ref.Sha)
    51  		commit, err := db.Commit(sha)
    52  		if err != nil {
    53  			ExitWithError(errors.Wrap(err, "fatal: unable to load commit"))
    54  		}
    55  
    56  		root := commit.TreeID
    57  
    58  		filter := git.GetAttributeFilter(cfg.LocalWorkingDir(), cfg.LocalGitDir())
    59  		if len(filter.Include()) == 0 {
    60  			ExitWithError(errors.Errorf("fatal: no Git LFS filters found in .gitattributes"))
    61  		}
    62  
    63  		gf := lfs.NewGitFilter(cfg)
    64  
    65  		for _, file := range args {
    66  			if !filter.Allows(file) {
    67  				ExitWithError(errors.Errorf("fatal: file %s did not match any Git LFS filters in .gitattributes", file))
    68  			}
    69  		}
    70  
    71  		for _, file := range args {
    72  			root, err = rewriteTree(gf, db, root, file)
    73  			if err != nil {
    74  				ExitWithError(errors.Wrapf(err, "fatal: could not rewrite %q", file))
    75  			}
    76  		}
    77  
    78  		name, email := cfg.CurrentCommitter()
    79  		author := fmt.Sprintf("%s <%s>", name, email)
    80  
    81  		oid, err := db.WriteCommit(&gitobj.Commit{
    82  			Author:    author,
    83  			Committer: author,
    84  			ParentIDs: [][]byte{sha},
    85  			Message:   generateMigrateCommitMessage(cmd, strings.Join(args, ",")),
    86  			TreeID:    root,
    87  		})
    88  
    89  		if err != nil {
    90  			ExitWithError(errors.Wrap(err, "fatal: unable to write commit"))
    91  		}
    92  
    93  		if err := git.UpdateRef(ref, oid, "git lfs migrate import --no-rewrite"); err != nil {
    94  			ExitWithError(errors.Wrap(err, "fatal: unable to update ref"))
    95  		}
    96  
    97  		if err := checkoutNonBare(l); err != nil {
    98  			ExitWithError(errors.Wrap(err, "fatal: could not checkout"))
    99  		}
   100  
   101  		return
   102  	}
   103  
   104  	if migrateFixup {
   105  		include, exclude := getIncludeExcludeArgs(cmd)
   106  		if include != nil || exclude != nil {
   107  			ExitWithError(errors.Errorf("fatal: cannot use --fixup with --include, --exclude"))
   108  		}
   109  	}
   110  
   111  	rewriter := getHistoryRewriter(cmd, db, l)
   112  
   113  	tracked := trackedFromFilter(rewriter.Filter())
   114  	exts := tools.NewOrderedSet()
   115  	gitfilter := lfs.NewGitFilter(cfg)
   116  
   117  	var fixups *gitattr.Tree
   118  
   119  	migrate(args, rewriter, l, &githistory.RewriteOptions{
   120  		Verbose:           migrateVerbose,
   121  		ObjectMapFilePath: objectMapFilePath,
   122  		BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
   123  			if filepath.Base(path) == ".gitattributes" {
   124  				return b, nil
   125  			}
   126  
   127  			if migrateFixup {
   128  				var ok bool
   129  				attrs := fixups.Applied(path)
   130  				for _, attr := range attrs {
   131  					if attr.K == "filter" {
   132  						ok = attr.V == "lfs"
   133  					}
   134  				}
   135  
   136  				if !ok {
   137  					return b, nil
   138  				}
   139  			}
   140  
   141  			var buf bytes.Buffer
   142  
   143  			if _, err := clean(gitfilter, &buf, b.Contents, path, b.Size); err != nil {
   144  				return nil, err
   145  			}
   146  
   147  			if ext := filepath.Ext(path); len(ext) > 0 {
   148  				exts.Add(fmt.Sprintf("*%s filter=lfs diff=lfs merge=lfs -text", ext))
   149  			}
   150  
   151  			return &gitobj.Blob{
   152  				Contents: &buf, Size: int64(buf.Len()),
   153  			}, nil
   154  		},
   155  
   156  		TreePreCallbackFn: func(path string, t *gitobj.Tree) error {
   157  			if migrateFixup && path == "/" {
   158  				var err error
   159  
   160  				fixups, err = gitattr.New(db, t)
   161  				if err != nil {
   162  					return err
   163  				}
   164  				return nil
   165  			}
   166  			return nil
   167  		},
   168  
   169  		TreeCallbackFn: func(path string, t *gitobj.Tree) (*gitobj.Tree, error) {
   170  			if path != "/" || migrateFixup {
   171  				// Avoid updating .gitattributes in non-root
   172  				// trees, or if --fixup is given.
   173  				return t, nil
   174  			}
   175  
   176  			ours := tracked
   177  			if ours.Cardinality() == 0 {
   178  				// If there were no explicitly tracked
   179  				// --include, --exclude filters, assume that the
   180  				// include set is the wildcard filepath
   181  				// extensions of files tracked.
   182  				ours = exts
   183  
   184  				if ours.Cardinality() == 0 {
   185  					// If it is still the case that we have
   186  					// no patterns to track, that means that
   187  					// we are in a tree that does not
   188  					// require .gitattributes changes.
   189  					//
   190  					// We can return early to avoid
   191  					// comparing and saving an identical
   192  					// tree.
   193  					return t, nil
   194  				}
   195  			}
   196  
   197  			theirs, err := trackedFromAttrs(db, t)
   198  			if err != nil {
   199  				return nil, err
   200  			}
   201  
   202  			// Create a blob of the attributes that are optionally
   203  			// present in the "t" tree's .gitattributes blob, and
   204  			// union in the patterns that we've tracked.
   205  			//
   206  			// Perform this Union() operation each time we visit a
   207  			// root tree such that if the underlying .gitattributes
   208  			// is present and has a diff between commits in the
   209  			// range of commits to migrate, those changes are
   210  			// preserved.
   211  			blob, err := trackedToBlob(db, theirs.Clone().Union(ours))
   212  			if err != nil {
   213  				return nil, err
   214  			}
   215  
   216  			// Finally, return a copy of the tree "t" that has the
   217  			// new .gitattributes file included/replaced.
   218  			return t.Merge(&gitobj.TreeEntry{
   219  				Name:     ".gitattributes",
   220  				Filemode: 0100644,
   221  				Oid:      blob,
   222  			}), nil
   223  		},
   224  
   225  		UpdateRefs: true,
   226  	})
   227  
   228  	if err := checkoutNonBare(l); err != nil {
   229  		ExitWithError(errors.Wrap(err, "fatal: could not checkout"))
   230  	}
   231  }
   232  
   233  // generateMigrateCommitMessage generates a commit message used with
   234  // --no-rewrite, using --message (if given) or generating one if it isn't.
   235  func generateMigrateCommitMessage(cmd *cobra.Command, patterns string) string {
   236  	if cmd.Flag("message").Changed {
   237  		return migrateCommitMessage
   238  	}
   239  	return fmt.Sprintf("%s: convert to Git LFS", patterns)
   240  }
   241  
   242  // checkoutNonBare forces a checkout of the current reference, so long as the
   243  // repository is non-bare.
   244  //
   245  // It returns nil on success, and a non-nil error on failure.
   246  func checkoutNonBare(l *tasklog.Logger) error {
   247  	if bare, _ := git.IsBare(); bare {
   248  		return nil
   249  	}
   250  
   251  	t := l.Waiter("migrate: checkout")
   252  	defer t.Complete()
   253  
   254  	return git.Checkout("", nil, true)
   255  }
   256  
   257  // trackedFromFilter returns an ordered set of strings where each entry is a
   258  // line in the .gitattributes file. It adds/removes the fiter/diff/merge=lfs
   259  // attributes based on patterns included/excldued in the given filter.
   260  func trackedFromFilter(filter *filepathfilter.Filter) *tools.OrderedSet {
   261  	tracked := tools.NewOrderedSet()
   262  
   263  	for _, include := range filter.Include() {
   264  		tracked.Add(fmt.Sprintf("%s filter=lfs diff=lfs merge=lfs -text", escapeAttrPattern(include)))
   265  	}
   266  
   267  	for _, exclude := range filter.Exclude() {
   268  		tracked.Add(fmt.Sprintf("%s text -filter -merge -diff", escapeAttrPattern(exclude)))
   269  	}
   270  
   271  	return tracked
   272  }
   273  
   274  var (
   275  	// attrsCache maintains a cache from the hex-encoded SHA1 of a
   276  	// .gitattributes blob to the set of patterns parsed from that blob.
   277  	attrsCache = make(map[string]*tools.OrderedSet)
   278  )
   279  
   280  // trackedFromAttrs returns an ordered line-delimited set of the contents of a
   281  // .gitattributes blob in a given tree "t".
   282  //
   283  // It returns an empty set if no attributes file could be found, or an error if
   284  // it could not otherwise be opened.
   285  func trackedFromAttrs(db *gitobj.ObjectDatabase, t *gitobj.Tree) (*tools.OrderedSet, error) {
   286  	var oid []byte
   287  
   288  	for _, e := range t.Entries {
   289  		if strings.ToLower(e.Name) == ".gitattributes" && e.Type() == gitobj.BlobObjectType {
   290  			oid = e.Oid
   291  			break
   292  		}
   293  	}
   294  
   295  	if oid == nil {
   296  		// TODO(@ttaylorr): make (*tools.OrderedSet)(nil) a valid
   297  		// receiver for non-mutative methods.
   298  		return tools.NewOrderedSet(), nil
   299  	}
   300  
   301  	sha1 := hex.EncodeToString(oid)
   302  
   303  	if s, ok := attrsCache[sha1]; ok {
   304  		return s, nil
   305  	}
   306  
   307  	blob, err := db.Blob(oid)
   308  	if err != nil {
   309  		return nil, err
   310  	}
   311  
   312  	attrs := tools.NewOrderedSet()
   313  
   314  	scanner := bufio.NewScanner(blob.Contents)
   315  	for scanner.Scan() {
   316  		attrs.Add(scanner.Text())
   317  	}
   318  
   319  	if err := scanner.Err(); err != nil {
   320  		return nil, err
   321  	}
   322  
   323  	attrsCache[sha1] = attrs
   324  
   325  	return attrsCache[sha1], nil
   326  }
   327  
   328  // trackedToBlob writes and returns the OID of a .gitattributes blob based on
   329  // the patterns given in the ordered set of patterns, "patterns".
   330  func trackedToBlob(db *gitobj.ObjectDatabase, patterns *tools.OrderedSet) ([]byte, error) {
   331  	var attrs bytes.Buffer
   332  
   333  	for pattern := range patterns.Iter() {
   334  		fmt.Fprintf(&attrs, "%s\n", pattern)
   335  	}
   336  
   337  	return db.WriteBlob(&gitobj.Blob{
   338  		Contents: &attrs,
   339  		Size:     int64(attrs.Len()),
   340  	})
   341  }
   342  
   343  // rewriteTree replaces the blob at the provided path within the given tree with
   344  // a git lfs pointer. It will recursively rewrite any subtrees along the path to the
   345  // blob.
   346  func rewriteTree(gf *lfs.GitFilter, db *gitobj.ObjectDatabase, root []byte, path string) ([]byte, error) {
   347  	tree, err := db.Tree(root)
   348  	if err != nil {
   349  		return nil, err
   350  	}
   351  
   352  	splits := strings.SplitN(path, "/", 2)
   353  
   354  	switch len(splits) {
   355  	case 1:
   356  		// The path points to an entry at the root of this tree, so it must be a blob.
   357  		// Try to replace this blob with a Git LFS pointer.
   358  		index := findEntry(tree, splits[0])
   359  		if index < 0 {
   360  			return nil, errors.Errorf("unable to find entry %s in tree", splits[0])
   361  		}
   362  
   363  		blobEntry := tree.Entries[index]
   364  		blob, err := db.Blob(blobEntry.Oid)
   365  		if err != nil {
   366  			return nil, err
   367  		}
   368  
   369  		var buf bytes.Buffer
   370  
   371  		if _, err := clean(gf, &buf, blob.Contents, blobEntry.Name, blob.Size); err != nil {
   372  			return nil, err
   373  		}
   374  
   375  		newOid, err := db.WriteBlob(&gitobj.Blob{
   376  			Contents: &buf,
   377  			Size:     int64(buf.Len()),
   378  		})
   379  
   380  		if err != nil {
   381  			return nil, err
   382  		}
   383  
   384  		tree = tree.Merge(&gitobj.TreeEntry{
   385  			Name:     splits[0],
   386  			Filemode: blobEntry.Filemode,
   387  			Oid:      newOid,
   388  		})
   389  		return db.WriteTree(tree)
   390  
   391  	case 2:
   392  		// The path points to an entry in a subtree contained at the root of the tree.
   393  		// Recursively rewrite the subtree.
   394  		head, tail := splits[0], splits[1]
   395  
   396  		index := findEntry(tree, head)
   397  		if index < 0 {
   398  			return nil, errors.Errorf("unable to find entry %s in tree", head)
   399  		}
   400  
   401  		subtreeEntry := tree.Entries[index]
   402  		if subtreeEntry.Type() != gitobj.TreeObjectType {
   403  			return nil, errors.Errorf("migrate: expected %s to be a tree, got %s", head, subtreeEntry.Type())
   404  		}
   405  
   406  		rewrittenSubtree, err := rewriteTree(gf, db, subtreeEntry.Oid, tail)
   407  		if err != nil {
   408  			return nil, err
   409  		}
   410  
   411  		tree = tree.Merge(&gitobj.TreeEntry{
   412  			Filemode: subtreeEntry.Filemode,
   413  			Name:     subtreeEntry.Name,
   414  			Oid:      rewrittenSubtree,
   415  		})
   416  
   417  		return db.WriteTree(tree)
   418  
   419  	default:
   420  		return nil, errors.Errorf("error parsing path %s", path)
   421  	}
   422  }
   423  
   424  // findEntry searches a tree for the desired entry, and returns the index of that
   425  // entry within the tree's Entries array
   426  func findEntry(t *gitobj.Tree, name string) int {
   427  	for i, entry := range t.Entries {
   428  		if entry.Name == name {
   429  			return i
   430  		}
   431  	}
   432  
   433  	return -1
   434  }