github.com/git-lfs/git-lfs@v2.5.2+incompatible/commands/command_filter_process.go (about) 1 package commands 2 3 import ( 4 "bytes" 5 "fmt" 6 "io" 7 "os" 8 "strings" 9 "sync" 10 11 "github.com/git-lfs/git-lfs/errors" 12 "github.com/git-lfs/git-lfs/filepathfilter" 13 "github.com/git-lfs/git-lfs/git" 14 "github.com/git-lfs/git-lfs/lfs" 15 "github.com/git-lfs/git-lfs/tq" 16 "github.com/spf13/cobra" 17 ) 18 19 const ( 20 // cleanFilterBufferCapacity is the desired capacity of the 21 // `*git.PacketWriter`'s internal buffer when the filter protocol 22 // dictates the "clean" command. 512 bytes is (in most cases) enough to 23 // hold an entire LFS pointer in memory. 24 cleanFilterBufferCapacity = 512 25 26 // smudgeFilterBufferCapacity is the desired capacity of the 27 // `*git.PacketWriter`'s internal buffer when the filter protocol 28 // dictates the "smudge" command. 29 smudgeFilterBufferCapacity = git.MaxPacketLength 30 ) 31 32 // filterSmudgeSkip is a command-line flag owned by the `filter-process` command 33 // dictating whether or not to skip the smudging process, leaving pointers as-is 34 // in the working tree. 35 var filterSmudgeSkip bool 36 37 func filterCommand(cmd *cobra.Command, args []string) { 38 requireStdin("This command should be run by the Git filter process") 39 installHooks(false) 40 41 s := git.NewFilterProcessScanner(os.Stdin, os.Stdout) 42 43 if err := s.Init(); err != nil { 44 ExitWithError(err) 45 } 46 47 caps, err := s.NegotiateCapabilities() 48 if err != nil { 49 ExitWithError(err) 50 } 51 52 var supportsDelay bool 53 for _, cap := range caps { 54 if cap == "capability=delay" { 55 supportsDelay = true 56 break 57 } 58 } 59 60 skip := filterSmudgeSkip || cfg.Os.Bool("GIT_LFS_SKIP_SMUDGE", false) 61 filter := filepathfilter.New(cfg.FetchIncludePaths(), cfg.FetchExcludePaths()) 62 63 ptrs := make(map[string]*lfs.Pointer) 64 65 var q *tq.TransferQueue 66 closeOnce := new(sync.Once) 67 available := make(chan *tq.Transfer) 68 69 if supportsDelay { 70 q = tq.NewTransferQueue( 71 tq.Download, 72 getTransferManifestOperationRemote("download", cfg.Remote()), 73 cfg.Remote(), 74 tq.RemoteRef(currentRemoteRef()), 75 ) 76 go infiniteTransferBuffer(q, available) 77 } 78 79 var malformed []string 80 var malformedOnWindows []string 81 gitfilter := lfs.NewGitFilter(cfg) 82 for s.Scan() { 83 var n int64 84 var err error 85 var delayed bool 86 var w *git.PktlineWriter 87 88 req := s.Request() 89 90 switch req.Header["command"] { 91 case "clean": 92 s.WriteStatus(statusFromErr(nil)) 93 w = git.NewPktlineWriter(os.Stdout, cleanFilterBufferCapacity) 94 95 var ptr *lfs.Pointer 96 ptr, err = clean(gitfilter, w, req.Payload, req.Header["pathname"], -1) 97 98 if ptr != nil { 99 n = ptr.Size 100 } 101 case "smudge": 102 w = git.NewPktlineWriter(os.Stdout, smudgeFilterBufferCapacity) 103 if req.Header["can-delay"] == "1" { 104 var ptr *lfs.Pointer 105 106 n, delayed, ptr, err = delayedSmudge(gitfilter, s, w, req.Payload, q, req.Header["pathname"], skip, filter) 107 108 if delayed { 109 ptrs[req.Header["pathname"]] = ptr 110 } 111 } else { 112 s.WriteStatus(statusFromErr(nil)) 113 from, ferr := incomingOrCached(req.Payload, ptrs[req.Header["pathname"]]) 114 if ferr != nil { 115 break 116 } 117 118 n, err = smudge(gitfilter, w, from, req.Header["pathname"], skip, filter) 119 if err == nil { 120 delete(ptrs, req.Header["pathname"]) 121 } 122 } 123 case "list_available_blobs": 124 closeOnce.Do(func() { 125 // The first time that Git sends us the 126 // 'list_available_blobs' command, it is given 127 // that no more smudge commands will be issued 128 // with _new_ checkout entries. 129 // 130 // This means that, by the time that we're here, 131 // we have seen all entries in the checkout, and 132 // should therefore instruct the transfer queue 133 // to make a batch out of whatever remaining 134 // items it has, and then close itself. 135 // 136 // This function call is wrapped in a 137 // `sync.(*Once).Do()` call so we only call 138 // `q.Wait()` once, and is called via a 139 // goroutine since `q.Wait()` is blocking. 140 go q.Wait() 141 }) 142 143 // The first, and all subsequent calls to 144 // list_available_blobs, we read items from `tq.Watch()` 145 // until a read from that channel becomes blocking (in 146 // other words, we read until there are no more items 147 // immediately ready to be sent back to Git). 148 paths := pathnames(readAvailable(available, q.BatchSize())) 149 if len(paths) == 0 { 150 // If `len(paths) == 0`, `tq.Watch()` has 151 // closed, indicating that all items have been 152 // completely processed, and therefore, sent 153 // back to Git for checkout. 154 for path, _ := range ptrs { 155 // If we sent a path to Git but it 156 // didn't ask for the smudge contents, 157 // that path is available and Git should 158 // accept it later. 159 paths = append(paths, fmt.Sprintf("pathname=%s", path)) 160 } 161 } 162 err = s.WriteList(paths) 163 default: 164 ExitWithError(fmt.Errorf("Unknown command %q", req.Header["command"])) 165 } 166 167 if errors.IsNotAPointerError(err) { 168 malformed = append(malformed, req.Header["pathname"]) 169 err = nil 170 } else if possiblyMalformedObjectSize(n) { 171 malformedOnWindows = append(malformedOnWindows, req.Header["pathname"]) 172 } 173 174 var status git.FilterProcessStatus 175 if delayed { 176 // If delayed, there is no need to call w.Flush() since 177 // no data was written. Calculate the status from the 178 // given error using 'delayedStatusFromErr'. 179 status = delayedStatusFromErr(err) 180 } else if ferr := w.Flush(); ferr != nil { 181 // Otherwise, we do need to call w.Flush(), since we 182 // have to assume that data was written. If the flush 183 // operation was unsuccessful, calculate the status 184 // using 'statusFromErr'. 185 status = statusFromErr(ferr) 186 } else { 187 // If the above flush was successful, we calculate the 188 // status from the above clean, smudge, or 189 // list_available_blobs command using statusFromErr, 190 // since we did not delay. 191 status = statusFromErr(err) 192 } 193 194 s.WriteStatus(status) 195 } 196 197 if len(malformed) > 0 { 198 fmt.Fprintf(os.Stderr, "Encountered %d file(s) that should have been pointers, but weren't:\n", len(malformed)) 199 for _, m := range malformed { 200 fmt.Fprintf(os.Stderr, "\t%s\n", m) 201 } 202 } 203 204 if len(malformedOnWindows) > 0 { 205 fmt.Fprintf(os.Stderr, "Encountered %d file(s) that may not have been copied correctly on Windows:\n", len(malformedOnWindows)) 206 207 for _, m := range malformedOnWindows { 208 fmt.Fprintf(os.Stderr, "\t%s\n", m) 209 } 210 211 fmt.Fprintf(os.Stderr, "\nSee: `git lfs help smudge` for more details.\n") 212 } 213 214 if err := s.Err(); err != nil && err != io.EOF { 215 ExitWithError(err) 216 } 217 } 218 219 // infiniteTransferBuffer streams the results of q.Watch() into "available" as 220 // if available had an infinite channel buffer. 221 func infiniteTransferBuffer(q *tq.TransferQueue, available chan<- *tq.Transfer) { 222 // Stream results from q.Watch() into chan "available" via an infinite 223 // buffer. 224 225 watch := q.Watch() 226 227 // pending is used to keep track of an ordered list of available 228 // `*tq.Transfer`'s that cannot be written to "available" without 229 // blocking. 230 var pending []*tq.Transfer 231 232 for { 233 if len(pending) > 0 { 234 select { 235 case t, ok := <-watch: 236 if !ok { 237 // If the list of pending elements is 238 // non-empty, stream them out (even if 239 // they block), and then close(). 240 for _, t = range pending { 241 available <- t 242 } 243 close(available) 244 return 245 } 246 pending = append(pending, t) 247 case available <- pending[0]: 248 // Otherwise, dequeue and shift the first 249 // element from pending onto available. 250 pending = pending[1:] 251 } 252 } else { 253 t, ok := <-watch 254 if !ok { 255 // If watch is closed, the "tq" is done, and 256 // there are no items on the buffer. Return 257 // immediately. 258 close(available) 259 return 260 } 261 262 select { 263 case available <- t: 264 // Copy an item directly from <-watch onto available<-. 265 default: 266 // Otherwise, if that would have blocked, make 267 // the new read pending. 268 pending = append(pending, t) 269 } 270 } 271 } 272 } 273 274 // incomingOrCached returns an io.Reader that is either the contents of the 275 // given io.Reader "r", or the encoded contents of "ptr". It returns an error if 276 // there was an error reading from "r". 277 // 278 // This is done because when a `command=smudge` with `can-delay=0` is issued, 279 // the entry's contents are not sent, and must be re-encoded from the stored 280 // pointer corresponding to the request's filepath. 281 func incomingOrCached(r io.Reader, ptr *lfs.Pointer) (io.Reader, error) { 282 buf := make([]byte, 1024) 283 n, err := r.Read(buf) 284 buf = buf[:n] 285 286 if n == 0 { 287 if ptr == nil { 288 // If we read no data from the given io.Reader "r" _and_ 289 // there was no data to fall back on, return an empty 290 // io.Reader yielding no data. 291 return bytes.NewReader(buf), nil 292 } 293 // If we read no data from the given io.Reader "r", _and_ there 294 // is a pointer that we can fall back on, return an io.Reader 295 // that yields the encoded version of the given pointer. 296 return strings.NewReader(ptr.Encoded()), nil 297 } 298 299 if err == io.EOF { 300 return bytes.NewReader(buf), nil 301 } 302 return io.MultiReader(bytes.NewReader(buf), r), err 303 } 304 305 // readAvailable satisfies the accumulation semantics for the 306 // 'list_available_blobs' command. It accumulates items until: 307 // 308 // 1. Reading from the channel of available items blocks, or ... 309 // 2. There is one item available, or ... 310 // 3. The 'tq.TransferQueue' is completed. 311 func readAvailable(ch <-chan *tq.Transfer, cap int) []*tq.Transfer { 312 ts := make([]*tq.Transfer, 0, cap) 313 314 for { 315 select { 316 case t, ok := <-ch: 317 if !ok { 318 return ts 319 } 320 ts = append(ts, t) 321 default: 322 if len(ts) > 0 { 323 return ts 324 } 325 326 t, ok := <-ch 327 if !ok { 328 return ts 329 } 330 return append(ts, t) 331 } 332 } 333 } 334 335 // pathnames formats a list of *tq.Transfers as a valid response to the 336 // 'list_available_blobs' command. 337 func pathnames(ts []*tq.Transfer) []string { 338 pathnames := make([]string, 0, len(ts)) 339 for _, t := range ts { 340 pathnames = append(pathnames, fmt.Sprintf("pathname=%s", t.Name)) 341 } 342 343 return pathnames 344 } 345 346 // statusFromErr returns the status code that should be sent over the filter 347 // protocol based on a given error, "err". 348 func statusFromErr(err error) git.FilterProcessStatus { 349 if err != nil && err != io.EOF { 350 return git.StatusError 351 } 352 return git.StatusSuccess 353 } 354 355 // delayedStatusFromErr returns the status code that should be sent over the 356 // filter protocol based on a given error, "err" when the blob smudge operation 357 // was delayed. 358 func delayedStatusFromErr(err error) git.FilterProcessStatus { 359 status := statusFromErr(err) 360 361 switch status { 362 case git.StatusSuccess: 363 return git.StatusDelay 364 default: 365 return status 366 } 367 } 368 369 func init() { 370 RegisterCommand("filter-process", filterCommand, func(cmd *cobra.Command) { 371 cmd.Flags().BoolVarP(&filterSmudgeSkip, "skip", "s", false, "") 372 }) 373 }