github.com/git-lfs/git-lfs@v2.5.2+incompatible/lfs/gitscanner_index.go (about)

     1  package lfs
     2  
     3  import (
     4  	"strings"
     5  	"sync"
     6  
     7  	"github.com/git-lfs/git-lfs/filepathfilter"
     8  )
     9  
    10  // ScanIndex returns a slice of WrappedPointer objects for all Git LFS pointers
    11  // it finds in the index.
    12  //
    13  // Ref is the ref at which to scan, which may be "HEAD" if there is at least one
    14  // commit.
    15  func scanIndex(cb GitScannerFoundPointer, ref string, f *filepathfilter.Filter) error {
    16  	indexMap := &indexFileMap{
    17  		nameMap:      make(map[string][]*indexFile),
    18  		nameShaPairs: make(map[string]bool),
    19  		mutex:        &sync.Mutex{},
    20  	}
    21  
    22  	revs, err := revListIndex(ref, false, indexMap)
    23  	if err != nil {
    24  		return err
    25  	}
    26  
    27  	cachedRevs, err := revListIndex(ref, true, indexMap)
    28  	if err != nil {
    29  		return err
    30  	}
    31  
    32  	allRevsErr := make(chan error, 5) // can be multiple errors below
    33  	allRevsChan := make(chan string, 1)
    34  	allRevs := NewStringChannelWrapper(allRevsChan, allRevsErr)
    35  	go func() {
    36  		seenRevs := make(map[string]bool, 0)
    37  
    38  		for rev := range cachedRevs.Results {
    39  			if !seenRevs[rev] {
    40  				allRevsChan <- rev
    41  				seenRevs[rev] = true
    42  			}
    43  		}
    44  		err = cachedRevs.Wait()
    45  		if err != nil {
    46  			allRevsErr <- err
    47  		}
    48  
    49  		for rev := range revs.Results {
    50  			if !seenRevs[rev] {
    51  				allRevsChan <- rev
    52  				seenRevs[rev] = true
    53  			}
    54  		}
    55  		err := revs.Wait()
    56  		if err != nil {
    57  			allRevsErr <- err
    58  		}
    59  		close(allRevsChan)
    60  		close(allRevsErr)
    61  	}()
    62  
    63  	smallShas, _, err := catFileBatchCheck(allRevs, nil)
    64  	if err != nil {
    65  		return err
    66  	}
    67  
    68  	ch := make(chan gitscannerResult, chanBufSize)
    69  
    70  	barePointerCh, _, err := catFileBatch(smallShas, nil)
    71  	if err != nil {
    72  		return err
    73  	}
    74  
    75  	go func() {
    76  		for p := range barePointerCh.Results {
    77  			for _, file := range indexMap.FilesFor(p.Sha1) {
    78  				// Append a new *WrappedPointer that combines the data
    79  				// from the index file, and the pointer "p".
    80  				ch <- gitscannerResult{
    81  					Pointer: &WrappedPointer{
    82  						Sha1:    p.Sha1,
    83  						Name:    file.Name,
    84  						SrcName: file.SrcName,
    85  						Status:  file.Status,
    86  						Pointer: p.Pointer,
    87  					},
    88  				}
    89  			}
    90  		}
    91  
    92  		if err := barePointerCh.Wait(); err != nil {
    93  			ch <- gitscannerResult{Err: err}
    94  		}
    95  
    96  		close(ch)
    97  	}()
    98  
    99  	for result := range ch {
   100  		if f.Allows(result.Pointer.Name) {
   101  			cb(result.Pointer, result.Err)
   102  		}
   103  	}
   104  
   105  	return nil
   106  }
   107  
   108  // revListIndex uses git diff-index to return the list of object sha1s
   109  // for in the indexf. It returns a channel from which sha1 strings can be read.
   110  // The namMap will be filled indexFile pointers mapping sha1s to indexFiles.
   111  func revListIndex(atRef string, cache bool, indexMap *indexFileMap) (*StringChannelWrapper, error) {
   112  	scanner, err := NewDiffIndexScanner(atRef, cache)
   113  	if err != nil {
   114  		return nil, err
   115  	}
   116  
   117  	revs := make(chan string, chanBufSize)
   118  	errs := make(chan error, 1)
   119  
   120  	go func() {
   121  		for scanner.Scan() {
   122  			var name string = scanner.Entry().DstName
   123  			if len(name) == 0 {
   124  				name = scanner.Entry().SrcName
   125  			}
   126  
   127  			indexMap.Add(scanner.Entry().DstSha, &indexFile{
   128  				Name:    name,
   129  				SrcName: scanner.Entry().SrcName,
   130  				Status:  string(scanner.Entry().Status),
   131  			})
   132  
   133  			revs <- scanner.Entry().DstSha
   134  		}
   135  
   136  		if err := scanner.Err(); err != nil {
   137  			errs <- err
   138  		}
   139  
   140  		close(revs)
   141  		close(errs)
   142  	}()
   143  
   144  	return NewStringChannelWrapper(revs, errs), nil
   145  }
   146  
   147  // indexFile is used when scanning the index. It stores the name of
   148  // the file, the status of the file in the index, and, in the case of
   149  // a moved or copied file, the original name of the file.
   150  type indexFile struct {
   151  	Name    string
   152  	SrcName string
   153  	Status  string
   154  }
   155  
   156  type indexFileMap struct {
   157  	// mutex guards nameMap and nameShaPairs
   158  	mutex *sync.Mutex
   159  	// nameMap maps SHA1s to a slice of `*indexFile`s
   160  	nameMap map[string][]*indexFile
   161  	// nameShaPairs maps "sha1:name" -> bool
   162  	nameShaPairs map[string]bool
   163  }
   164  
   165  // FilesFor returns all `*indexFile`s that match the given `sha`.
   166  func (m *indexFileMap) FilesFor(sha string) []*indexFile {
   167  	m.mutex.Lock()
   168  	defer m.mutex.Unlock()
   169  
   170  	return m.nameMap[sha]
   171  }
   172  
   173  // Add appends unique index files to the given SHA, "sha". A file is considered
   174  // unique if its combination of SHA and current filename have not yet been seen
   175  // by this instance "m" of *indexFileMap.
   176  func (m *indexFileMap) Add(sha string, index *indexFile) {
   177  	m.mutex.Lock()
   178  	defer m.mutex.Unlock()
   179  
   180  	pairKey := strings.Join([]string{sha, index.Name}, ":")
   181  	if m.nameShaPairs[pairKey] {
   182  		return
   183  	}
   184  
   185  	m.nameMap[sha] = append(m.nameMap[sha], index)
   186  	m.nameShaPairs[pairKey] = true
   187  }