github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/git/pipeline/lfs_nogogit.go (about)

     1  // Copyright 2023 The GitBundle Inc. All rights reserved.
     2  // Copyright 2017 The Gitea Authors. All rights reserved.
     3  // Use of this source code is governed by a MIT-style
     4  // license that can be found in the LICENSE file.
     5  
     6  //go:build !gogit
     7  
     8  package pipeline
     9  
    10  import (
    11  	"bufio"
    12  	"bytes"
    13  	"fmt"
    14  	"io"
    15  	"sort"
    16  	"strings"
    17  	"sync"
    18  	"time"
    19  
    20  	"github.com/gitbundle/modules/git"
    21  )
    22  
    23  // LFSResult represents commits found using a provided pointer file hash
    24  type LFSResult struct {
    25  	Name           string
    26  	SHA            string
    27  	Summary        string
    28  	When           time.Time
    29  	ParentHashes   []git.SHA1
    30  	BranchName     string
    31  	FullCommitName string
    32  }
    33  
    34  type lfsResultSlice []*LFSResult
    35  
    36  func (a lfsResultSlice) Len() int           { return len(a) }
    37  func (a lfsResultSlice) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
    38  func (a lfsResultSlice) Less(i, j int) bool { return a[j].When.After(a[i].When) }
    39  
    40  // FindLFSFile finds commits that contain a provided pointer file hash
    41  func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) {
    42  	resultsMap := map[string]*LFSResult{}
    43  	results := make([]*LFSResult, 0)
    44  
    45  	basePath := repo.Path
    46  
    47  	// Use rev-list to provide us with all commits in order
    48  	revListReader, revListWriter := io.Pipe()
    49  	defer func() {
    50  		_ = revListWriter.Close()
    51  		_ = revListReader.Close()
    52  	}()
    53  
    54  	go func() {
    55  		stderr := strings.Builder{}
    56  		err := git.NewCommand(repo.Ctx, "rev-list", "--all").Run(&git.RunOpts{
    57  			Dir:    repo.Path,
    58  			Stdout: revListWriter,
    59  			Stderr: &stderr,
    60  		})
    61  		if err != nil {
    62  			_ = revListWriter.CloseWithError(git.ConcatenateError(err, (&stderr).String()))
    63  		} else {
    64  			_ = revListWriter.Close()
    65  		}
    66  	}()
    67  
    68  	// Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
    69  	// so let's create a batch stdin and stdout
    70  	batchStdinWriter, batchReader, cancel := repo.CatFileBatch(repo.Ctx)
    71  	defer cancel()
    72  
    73  	// We'll use a scanner for the revList because it's simpler than a bufio.Reader
    74  	scan := bufio.NewScanner(revListReader)
    75  	trees := [][]byte{}
    76  	paths := []string{}
    77  
    78  	fnameBuf := make([]byte, 4096)
    79  	modeBuf := make([]byte, 40)
    80  	workingShaBuf := make([]byte, 20)
    81  
    82  	for scan.Scan() {
    83  		// Get the next commit ID
    84  		commitID := scan.Bytes()
    85  
    86  		// push the commit to the cat-file --batch process
    87  		_, err := batchStdinWriter.Write(commitID)
    88  		if err != nil {
    89  			return nil, err
    90  		}
    91  		_, err = batchStdinWriter.Write([]byte{'\n'})
    92  		if err != nil {
    93  			return nil, err
    94  		}
    95  
    96  		var curCommit *git.Commit
    97  		curPath := ""
    98  
    99  	commitReadingLoop:
   100  		for {
   101  			_, typ, size, err := git.ReadBatchLine(batchReader)
   102  			if err != nil {
   103  				return nil, err
   104  			}
   105  
   106  			switch typ {
   107  			case "tag":
   108  				// This shouldn't happen but if it does well just get the commit and try again
   109  				id, err := git.ReadTagObjectID(batchReader, size)
   110  				if err != nil {
   111  					return nil, err
   112  				}
   113  				_, err = batchStdinWriter.Write([]byte(id + "\n"))
   114  				if err != nil {
   115  					return nil, err
   116  				}
   117  				continue
   118  			case "commit":
   119  				// Read in the commit to get its tree and in case this is one of the last used commits
   120  				curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, int64(size)))
   121  				if err != nil {
   122  					return nil, err
   123  				}
   124  				if _, err := batchReader.Discard(1); err != nil {
   125  					return nil, err
   126  				}
   127  
   128  				_, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n"))
   129  				if err != nil {
   130  					return nil, err
   131  				}
   132  				curPath = ""
   133  			case "tree":
   134  				var n int64
   135  				for n < size {
   136  					mode, fname, sha20byte, count, err := git.ParseTreeLine(batchReader, modeBuf, fnameBuf, workingShaBuf)
   137  					if err != nil {
   138  						return nil, err
   139  					}
   140  					n += int64(count)
   141  					if bytes.Equal(sha20byte, hash[:]) {
   142  						result := LFSResult{
   143  							Name:         curPath + string(fname),
   144  							SHA:          curCommit.ID.String(),
   145  							Summary:      strings.Split(strings.TrimSpace(curCommit.CommitMessage), "\n")[0],
   146  							When:         curCommit.Author.When,
   147  							ParentHashes: curCommit.Parents,
   148  						}
   149  						resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result
   150  					} else if string(mode) == git.EntryModeTree.String() {
   151  						sha40Byte := make([]byte, 40)
   152  						git.To40ByteSHA(sha20byte, sha40Byte)
   153  						trees = append(trees, sha40Byte)
   154  						paths = append(paths, curPath+string(fname)+"/")
   155  					}
   156  				}
   157  				if _, err := batchReader.Discard(1); err != nil {
   158  					return nil, err
   159  				}
   160  				if len(trees) > 0 {
   161  					_, err := batchStdinWriter.Write(trees[len(trees)-1])
   162  					if err != nil {
   163  						return nil, err
   164  					}
   165  					_, err = batchStdinWriter.Write([]byte("\n"))
   166  					if err != nil {
   167  						return nil, err
   168  					}
   169  					curPath = paths[len(paths)-1]
   170  					trees = trees[:len(trees)-1]
   171  					paths = paths[:len(paths)-1]
   172  				} else {
   173  					break commitReadingLoop
   174  				}
   175  			}
   176  		}
   177  	}
   178  
   179  	if err := scan.Err(); err != nil {
   180  		return nil, err
   181  	}
   182  
   183  	for _, result := range resultsMap {
   184  		hasParent := false
   185  		for _, parentHash := range result.ParentHashes {
   186  			if _, hasParent = resultsMap[parentHash.String()+":"+result.Name]; hasParent {
   187  				break
   188  			}
   189  		}
   190  		if !hasParent {
   191  			results = append(results, result)
   192  		}
   193  	}
   194  
   195  	sort.Sort(lfsResultSlice(results))
   196  
   197  	// Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple
   198  	shasToNameReader, shasToNameWriter := io.Pipe()
   199  	nameRevStdinReader, nameRevStdinWriter := io.Pipe()
   200  	errChan := make(chan error, 1)
   201  	wg := sync.WaitGroup{}
   202  	wg.Add(3)
   203  
   204  	go func() {
   205  		defer wg.Done()
   206  		scanner := bufio.NewScanner(nameRevStdinReader)
   207  		i := 0
   208  		for scanner.Scan() {
   209  			line := scanner.Text()
   210  			if len(line) == 0 {
   211  				continue
   212  			}
   213  			result := results[i]
   214  			result.FullCommitName = line
   215  			result.BranchName = strings.Split(line, "~")[0]
   216  			i++
   217  		}
   218  	}()
   219  	go NameRevStdin(repo.Ctx, shasToNameReader, nameRevStdinWriter, &wg, basePath)
   220  	go func() {
   221  		defer wg.Done()
   222  		defer shasToNameWriter.Close()
   223  		for _, result := range results {
   224  			_, err := shasToNameWriter.Write([]byte(result.SHA))
   225  			if err != nil {
   226  				errChan <- err
   227  				break
   228  			}
   229  			_, err = shasToNameWriter.Write([]byte{'\n'})
   230  			if err != nil {
   231  				errChan <- err
   232  				break
   233  			}
   234  
   235  		}
   236  	}()
   237  
   238  	wg.Wait()
   239  
   240  	select {
   241  	case err, has := <-errChan:
   242  		if has {
   243  			return nil, fmt.Errorf("Unable to obtain name for LFS files. Error: %w", err)
   244  		}
   245  	default:
   246  	}
   247  
   248  	return results, nil
   249  }