github.com/git-lfs/git-lfs@v2.5.2+incompatible/commands/command_filter_process.go (about)

     1  package commands
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io"
     7  	"os"
     8  	"strings"
     9  	"sync"
    10  
    11  	"github.com/git-lfs/git-lfs/errors"
    12  	"github.com/git-lfs/git-lfs/filepathfilter"
    13  	"github.com/git-lfs/git-lfs/git"
    14  	"github.com/git-lfs/git-lfs/lfs"
    15  	"github.com/git-lfs/git-lfs/tq"
    16  	"github.com/spf13/cobra"
    17  )
    18  
    19  const (
    20  	// cleanFilterBufferCapacity is the desired capacity of the
    21  	// `*git.PacketWriter`'s internal buffer when the filter protocol
    22  	// dictates the "clean" command. 512 bytes is (in most cases) enough to
    23  	// hold an entire LFS pointer in memory.
    24  	cleanFilterBufferCapacity = 512
    25  
    26  	// smudgeFilterBufferCapacity is the desired capacity of the
    27  	// `*git.PacketWriter`'s internal buffer when the filter protocol
    28  	// dictates the "smudge" command.
    29  	smudgeFilterBufferCapacity = git.MaxPacketLength
    30  )
    31  
    32  // filterSmudgeSkip is a command-line flag owned by the `filter-process` command
    33  // dictating whether or not to skip the smudging process, leaving pointers as-is
    34  // in the working tree.
    35  var filterSmudgeSkip bool
    36  
    37  func filterCommand(cmd *cobra.Command, args []string) {
    38  	requireStdin("This command should be run by the Git filter process")
    39  	installHooks(false)
    40  
    41  	s := git.NewFilterProcessScanner(os.Stdin, os.Stdout)
    42  
    43  	if err := s.Init(); err != nil {
    44  		ExitWithError(err)
    45  	}
    46  
    47  	caps, err := s.NegotiateCapabilities()
    48  	if err != nil {
    49  		ExitWithError(err)
    50  	}
    51  
    52  	var supportsDelay bool
    53  	for _, cap := range caps {
    54  		if cap == "capability=delay" {
    55  			supportsDelay = true
    56  			break
    57  		}
    58  	}
    59  
    60  	skip := filterSmudgeSkip || cfg.Os.Bool("GIT_LFS_SKIP_SMUDGE", false)
    61  	filter := filepathfilter.New(cfg.FetchIncludePaths(), cfg.FetchExcludePaths())
    62  
    63  	ptrs := make(map[string]*lfs.Pointer)
    64  
    65  	var q *tq.TransferQueue
    66  	closeOnce := new(sync.Once)
    67  	available := make(chan *tq.Transfer)
    68  
    69  	if supportsDelay {
    70  		q = tq.NewTransferQueue(
    71  			tq.Download,
    72  			getTransferManifestOperationRemote("download", cfg.Remote()),
    73  			cfg.Remote(),
    74  			tq.RemoteRef(currentRemoteRef()),
    75  		)
    76  		go infiniteTransferBuffer(q, available)
    77  	}
    78  
    79  	var malformed []string
    80  	var malformedOnWindows []string
    81  	gitfilter := lfs.NewGitFilter(cfg)
    82  	for s.Scan() {
    83  		var n int64
    84  		var err error
    85  		var delayed bool
    86  		var w *git.PktlineWriter
    87  
    88  		req := s.Request()
    89  
    90  		switch req.Header["command"] {
    91  		case "clean":
    92  			s.WriteStatus(statusFromErr(nil))
    93  			w = git.NewPktlineWriter(os.Stdout, cleanFilterBufferCapacity)
    94  
    95  			var ptr *lfs.Pointer
    96  			ptr, err = clean(gitfilter, w, req.Payload, req.Header["pathname"], -1)
    97  
    98  			if ptr != nil {
    99  				n = ptr.Size
   100  			}
   101  		case "smudge":
   102  			w = git.NewPktlineWriter(os.Stdout, smudgeFilterBufferCapacity)
   103  			if req.Header["can-delay"] == "1" {
   104  				var ptr *lfs.Pointer
   105  
   106  				n, delayed, ptr, err = delayedSmudge(gitfilter, s, w, req.Payload, q, req.Header["pathname"], skip, filter)
   107  
   108  				if delayed {
   109  					ptrs[req.Header["pathname"]] = ptr
   110  				}
   111  			} else {
   112  				s.WriteStatus(statusFromErr(nil))
   113  				from, ferr := incomingOrCached(req.Payload, ptrs[req.Header["pathname"]])
   114  				if ferr != nil {
   115  					break
   116  				}
   117  
   118  				n, err = smudge(gitfilter, w, from, req.Header["pathname"], skip, filter)
   119  				if err == nil {
   120  					delete(ptrs, req.Header["pathname"])
   121  				}
   122  			}
   123  		case "list_available_blobs":
   124  			closeOnce.Do(func() {
   125  				// The first time that Git sends us the
   126  				// 'list_available_blobs' command, it is given
   127  				// that no more smudge commands will be issued
   128  				// with _new_ checkout entries.
   129  				//
   130  				// This means that, by the time that we're here,
   131  				// we have seen all entries in the checkout, and
   132  				// should therefore instruct the transfer queue
   133  				// to make a batch out of whatever remaining
   134  				// items it has, and then close itself.
   135  				//
   136  				// This function call is wrapped in a
   137  				// `sync.(*Once).Do()` call so we only call
   138  				// `q.Wait()` once, and is called via a
   139  				// goroutine since `q.Wait()` is blocking.
   140  				go q.Wait()
   141  			})
   142  
   143  			// The first, and all subsequent calls to
   144  			// list_available_blobs, we read items from `tq.Watch()`
   145  			// until a read from that channel becomes blocking (in
   146  			// other words, we read until there are no more items
   147  			// immediately ready to be sent back to Git).
   148  			paths := pathnames(readAvailable(available, q.BatchSize()))
   149  			if len(paths) == 0 {
   150  				// If `len(paths) == 0`, `tq.Watch()` has
   151  				// closed, indicating that all items have been
   152  				// completely processed, and therefore, sent
   153  				// back to Git for checkout.
   154  				for path, _ := range ptrs {
   155  					// If we sent a path to Git but it
   156  					// didn't ask for the smudge contents,
   157  					// that path is available and Git should
   158  					// accept it later.
   159  					paths = append(paths, fmt.Sprintf("pathname=%s", path))
   160  				}
   161  			}
   162  			err = s.WriteList(paths)
   163  		default:
   164  			ExitWithError(fmt.Errorf("Unknown command %q", req.Header["command"]))
   165  		}
   166  
   167  		if errors.IsNotAPointerError(err) {
   168  			malformed = append(malformed, req.Header["pathname"])
   169  			err = nil
   170  		} else if possiblyMalformedObjectSize(n) {
   171  			malformedOnWindows = append(malformedOnWindows, req.Header["pathname"])
   172  		}
   173  
   174  		var status git.FilterProcessStatus
   175  		if delayed {
   176  			// If delayed, there is no need to call w.Flush() since
   177  			// no data was written. Calculate the status from the
   178  			// given error using 'delayedStatusFromErr'.
   179  			status = delayedStatusFromErr(err)
   180  		} else if ferr := w.Flush(); ferr != nil {
   181  			// Otherwise, we do need to call w.Flush(), since we
   182  			// have to assume that data was written. If the flush
   183  			// operation was unsuccessful, calculate the status
   184  			// using 'statusFromErr'.
   185  			status = statusFromErr(ferr)
   186  		} else {
   187  			// If the above flush was successful, we calculate the
   188  			// status from the above clean, smudge, or
   189  			// list_available_blobs command using statusFromErr,
   190  			// since we did not delay.
   191  			status = statusFromErr(err)
   192  		}
   193  
   194  		s.WriteStatus(status)
   195  	}
   196  
   197  	if len(malformed) > 0 {
   198  		fmt.Fprintf(os.Stderr, "Encountered %d file(s) that should have been pointers, but weren't:\n", len(malformed))
   199  		for _, m := range malformed {
   200  			fmt.Fprintf(os.Stderr, "\t%s\n", m)
   201  		}
   202  	}
   203  
   204  	if len(malformedOnWindows) > 0 {
   205  		fmt.Fprintf(os.Stderr, "Encountered %d file(s) that may not have been copied correctly on Windows:\n", len(malformedOnWindows))
   206  
   207  		for _, m := range malformedOnWindows {
   208  			fmt.Fprintf(os.Stderr, "\t%s\n", m)
   209  		}
   210  
   211  		fmt.Fprintf(os.Stderr, "\nSee: `git lfs help smudge` for more details.\n")
   212  	}
   213  
   214  	if err := s.Err(); err != nil && err != io.EOF {
   215  		ExitWithError(err)
   216  	}
   217  }
   218  
   219  // infiniteTransferBuffer streams the results of q.Watch() into "available" as
   220  // if available had an infinite channel buffer.
   221  func infiniteTransferBuffer(q *tq.TransferQueue, available chan<- *tq.Transfer) {
   222  	// Stream results from q.Watch() into chan "available" via an infinite
   223  	// buffer.
   224  
   225  	watch := q.Watch()
   226  
   227  	// pending is used to keep track of an ordered list of available
   228  	// `*tq.Transfer`'s that cannot be written to "available" without
   229  	// blocking.
   230  	var pending []*tq.Transfer
   231  
   232  	for {
   233  		if len(pending) > 0 {
   234  			select {
   235  			case t, ok := <-watch:
   236  				if !ok {
   237  					// If the list of pending elements is
   238  					// non-empty, stream them out (even if
   239  					// they block), and then close().
   240  					for _, t = range pending {
   241  						available <- t
   242  					}
   243  					close(available)
   244  					return
   245  				}
   246  				pending = append(pending, t)
   247  			case available <- pending[0]:
   248  				// Otherwise, dequeue and shift the first
   249  				// element from pending onto available.
   250  				pending = pending[1:]
   251  			}
   252  		} else {
   253  			t, ok := <-watch
   254  			if !ok {
   255  				// If watch is closed, the "tq" is done, and
   256  				// there are no items on the buffer.  Return
   257  				// immediately.
   258  				close(available)
   259  				return
   260  			}
   261  
   262  			select {
   263  			case available <- t:
   264  			// Copy an item directly from <-watch onto available<-.
   265  			default:
   266  				// Otherwise, if that would have blocked, make
   267  				// the new read pending.
   268  				pending = append(pending, t)
   269  			}
   270  		}
   271  	}
   272  }
   273  
   274  // incomingOrCached returns an io.Reader that is either the contents of the
   275  // given io.Reader "r", or the encoded contents of "ptr". It returns an error if
   276  // there was an error reading from "r".
   277  //
   278  // This is done because when a `command=smudge` with `can-delay=0` is issued,
   279  // the entry's contents are not sent, and must be re-encoded from the stored
   280  // pointer corresponding to the request's filepath.
   281  func incomingOrCached(r io.Reader, ptr *lfs.Pointer) (io.Reader, error) {
   282  	buf := make([]byte, 1024)
   283  	n, err := r.Read(buf)
   284  	buf = buf[:n]
   285  
   286  	if n == 0 {
   287  		if ptr == nil {
   288  			// If we read no data from the given io.Reader "r" _and_
   289  			// there was no data to fall back on, return an empty
   290  			// io.Reader yielding no data.
   291  			return bytes.NewReader(buf), nil
   292  		}
   293  		// If we read no data from the given io.Reader "r", _and_ there
   294  		// is a pointer that we can fall back on, return an io.Reader
   295  		// that yields the encoded version of the given pointer.
   296  		return strings.NewReader(ptr.Encoded()), nil
   297  	}
   298  
   299  	if err == io.EOF {
   300  		return bytes.NewReader(buf), nil
   301  	}
   302  	return io.MultiReader(bytes.NewReader(buf), r), err
   303  }
   304  
   305  // readAvailable satisfies the accumulation semantics for the
   306  // 'list_available_blobs' command. It accumulates items until:
   307  //
   308  // 1. Reading from the channel of available items blocks, or ...
   309  // 2. There is one item available, or ...
   310  // 3. The 'tq.TransferQueue' is completed.
   311  func readAvailable(ch <-chan *tq.Transfer, cap int) []*tq.Transfer {
   312  	ts := make([]*tq.Transfer, 0, cap)
   313  
   314  	for {
   315  		select {
   316  		case t, ok := <-ch:
   317  			if !ok {
   318  				return ts
   319  			}
   320  			ts = append(ts, t)
   321  		default:
   322  			if len(ts) > 0 {
   323  				return ts
   324  			}
   325  
   326  			t, ok := <-ch
   327  			if !ok {
   328  				return ts
   329  			}
   330  			return append(ts, t)
   331  		}
   332  	}
   333  }
   334  
   335  // pathnames formats a list of *tq.Transfers as a valid response to the
   336  // 'list_available_blobs' command.
   337  func pathnames(ts []*tq.Transfer) []string {
   338  	pathnames := make([]string, 0, len(ts))
   339  	for _, t := range ts {
   340  		pathnames = append(pathnames, fmt.Sprintf("pathname=%s", t.Name))
   341  	}
   342  
   343  	return pathnames
   344  }
   345  
   346  // statusFromErr returns the status code that should be sent over the filter
   347  // protocol based on a given error, "err".
   348  func statusFromErr(err error) git.FilterProcessStatus {
   349  	if err != nil && err != io.EOF {
   350  		return git.StatusError
   351  	}
   352  	return git.StatusSuccess
   353  }
   354  
   355  // delayedStatusFromErr returns the status code that should be sent over the
   356  // filter protocol based on a given error, "err" when the blob smudge operation
   357  // was delayed.
   358  func delayedStatusFromErr(err error) git.FilterProcessStatus {
   359  	status := statusFromErr(err)
   360  
   361  	switch status {
   362  	case git.StatusSuccess:
   363  		return git.StatusDelay
   364  	default:
   365  		return status
   366  	}
   367  }
   368  
   369  func init() {
   370  	RegisterCommand("filter-process", filterCommand, func(cmd *cobra.Command) {
   371  		cmd.Flags().BoolVarP(&filterSmudgeSkip, "skip", "s", false, "")
   372  	})
   373  }