github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/git/batch_reader.go (about) 1 // Copyright 2023 The GitBundle Inc. All rights reserved. 2 // Copyright 2017 The Gitea Authors. All rights reserved. 3 // Use of this source code is governed by a MIT-style 4 // license that can be found in the LICENSE file. 5 6 package git 7 8 import ( 9 "bufio" 10 "bytes" 11 "context" 12 "fmt" 13 "io" 14 "math" 15 "runtime" 16 "strconv" 17 "strings" 18 19 "github.com/gitbundle/modules/log" 20 21 "github.com/djherbis/buffer" 22 "github.com/djherbis/nio/v3" 23 ) 24 25 // WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function 26 type WriteCloserError interface { 27 io.WriteCloser 28 CloseWithError(err error) error 29 } 30 31 // EnsureValidGitRepository runs git rev-parse in the repository path - thus ensuring that the repository is a valid repository. 32 // Run before opening git cat-file. 33 // This is needed otherwise the git cat-file will hang for invalid repositories. 34 func EnsureValidGitRepository(ctx context.Context, repoPath string) error { 35 stderr := strings.Builder{} 36 err := NewCommand(ctx, "rev-parse"). 37 SetDescription(fmt.Sprintf("%s rev-parse [repo_path: %s]", GitExecutable, repoPath)). 38 Run(&RunOpts{ 39 Dir: repoPath, 40 Stderr: &stderr, 41 }) 42 if err != nil { 43 return ConcatenateError(err, (&stderr).String()) 44 } 45 return nil 46 } 47 48 // CatFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function 49 func CatFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { 50 batchStdinReader, batchStdinWriter := io.Pipe() 51 batchStdoutReader, batchStdoutWriter := io.Pipe() 52 ctx, ctxCancel := context.WithCancel(ctx) 53 closed := make(chan struct{}) 54 cancel := func() { 55 ctxCancel() 56 _ = batchStdoutReader.Close() 57 _ = batchStdinWriter.Close() 58 <-closed 59 } 60 61 // Ensure cancel is called as soon as the provided context is cancelled 62 go func() { 63 <-ctx.Done() 64 cancel() 65 }() 66 67 _, filename, line, _ := runtime.Caller(2) 68 filename = strings.TrimPrefix(filename, callerPrefix) 69 70 go func() { 71 stderr := strings.Builder{} 72 err := NewCommand(ctx, "cat-file", "--batch-check"). 73 SetDescription(fmt.Sprintf("%s cat-file --batch-check [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)). 74 Run(&RunOpts{ 75 Dir: repoPath, 76 Stdin: batchStdinReader, 77 Stdout: batchStdoutWriter, 78 Stderr: &stderr, 79 }) 80 if err != nil { 81 _ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) 82 _ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String())) 83 } else { 84 _ = batchStdoutWriter.Close() 85 _ = batchStdinReader.Close() 86 } 87 close(closed) 88 }() 89 90 // For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check 91 batchReader := bufio.NewReader(batchStdoutReader) 92 93 return batchStdinWriter, batchReader, cancel 94 } 95 96 // CatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function 97 func CatFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { 98 // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. 99 // so let's create a batch stdin and stdout 100 batchStdinReader, batchStdinWriter := io.Pipe() 101 batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024)) 102 ctx, ctxCancel := context.WithCancel(ctx) 103 closed := make(chan struct{}) 104 cancel := func() { 105 ctxCancel() 106 _ = batchStdinWriter.Close() 107 _ = batchStdoutReader.Close() 108 <-closed 109 } 110 111 // Ensure cancel is called as soon as the provided context is cancelled 112 go func() { 113 <-ctx.Done() 114 cancel() 115 }() 116 117 _, filename, line, _ := runtime.Caller(2) 118 filename = strings.TrimPrefix(filename, callerPrefix) 119 120 go func() { 121 stderr := strings.Builder{} 122 err := NewCommand(ctx, "cat-file", "--batch"). 123 SetDescription(fmt.Sprintf("%s cat-file --batch [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)). 124 Run(&RunOpts{ 125 Dir: repoPath, 126 Stdin: batchStdinReader, 127 Stdout: batchStdoutWriter, 128 Stderr: &stderr, 129 }) 130 if err != nil { 131 _ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) 132 _ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String())) 133 } else { 134 _ = batchStdoutWriter.Close() 135 _ = batchStdinReader.Close() 136 } 137 close(closed) 138 }() 139 140 // For simplicities sake we'll us a buffered reader to read from the cat-file --batch 141 batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024) 142 143 return batchStdinWriter, batchReader, cancel 144 } 145 146 // ReadBatchLine reads the header line from cat-file --batch 147 // We expect: 148 // <sha> SP <type> SP <size> LF 149 // sha is a 40byte not 20byte here 150 func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) { 151 typ, err = rd.ReadString('\n') 152 if err != nil { 153 return 154 } 155 if len(typ) == 1 { 156 typ, err = rd.ReadString('\n') 157 if err != nil { 158 return 159 } 160 } 161 idx := strings.IndexByte(typ, ' ') 162 if idx < 0 { 163 log.Debug("missing space typ: %s", typ) 164 err = ErrNotExist{ID: string(sha)} 165 return 166 } 167 sha = []byte(typ[:idx]) 168 typ = typ[idx+1:] 169 170 idx = strings.IndexByte(typ, ' ') 171 if idx < 0 { 172 err = ErrNotExist{ID: string(sha)} 173 return 174 } 175 176 sizeStr := typ[idx+1 : len(typ)-1] 177 typ = typ[:idx] 178 179 size, err = strconv.ParseInt(sizeStr, 10, 64) 180 return 181 } 182 183 // ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream. 184 func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) { 185 id := "" 186 var n int64 187 headerLoop: 188 for { 189 line, err := rd.ReadBytes('\n') 190 if err != nil { 191 return "", err 192 } 193 n += int64(len(line)) 194 idx := bytes.Index(line, []byte{' '}) 195 if idx < 0 { 196 continue 197 } 198 199 if string(line[:idx]) == "object" { 200 id = string(line[idx+1 : len(line)-1]) 201 break headerLoop 202 } 203 } 204 205 // Discard the rest of the tag 206 discard := size - n + 1 207 for discard > math.MaxInt32 { 208 _, err := rd.Discard(math.MaxInt32) 209 if err != nil { 210 return id, err 211 } 212 discard -= math.MaxInt32 213 } 214 _, err := rd.Discard(int(discard)) 215 return id, err 216 } 217 218 // ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream. 219 func ReadTreeID(rd *bufio.Reader, size int64) (string, error) { 220 id := "" 221 var n int64 222 headerLoop: 223 for { 224 line, err := rd.ReadBytes('\n') 225 if err != nil { 226 return "", err 227 } 228 n += int64(len(line)) 229 idx := bytes.Index(line, []byte{' '}) 230 if idx < 0 { 231 continue 232 } 233 234 if string(line[:idx]) == "tree" { 235 id = string(line[idx+1 : len(line)-1]) 236 break headerLoop 237 } 238 } 239 240 // Discard the rest of the commit 241 discard := size - n + 1 242 for discard > math.MaxInt32 { 243 _, err := rd.Discard(math.MaxInt32) 244 if err != nil { 245 return id, err 246 } 247 discard -= math.MaxInt32 248 } 249 _, err := rd.Discard(int(discard)) 250 return id, err 251 } 252 253 // git tree files are a list: 254 // <mode-in-ascii> SP <fname> NUL <20-byte SHA> 255 // 256 // Unfortunately this 20-byte notation is somewhat in conflict to all other git tools 257 // Therefore we need some method to convert these 20-byte SHAs to a 40-byte SHA 258 259 // constant hextable to help quickly convert between 20byte and 40byte hashes 260 const hextable = "0123456789abcdef" 261 262 // To40ByteSHA converts a 20-byte SHA into a 40-byte sha. Input and output can be the 263 // same 40 byte slice to support in place conversion without allocations. 264 // This is at least 100x quicker that hex.EncodeToString 265 // NB This requires that out is a 40-byte slice 266 func To40ByteSHA(sha, out []byte) []byte { 267 for i := 19; i >= 0; i-- { 268 v := sha[i] 269 vhi, vlo := v>>4, v&0x0f 270 shi, slo := hextable[vhi], hextable[vlo] 271 out[i*2], out[i*2+1] = shi, slo 272 } 273 return out 274 } 275 276 // ParseTreeLine reads an entry from a tree in a cat-file --batch stream 277 // This carefully avoids allocations - except where fnameBuf is too small. 278 // It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations 279 // 280 // Each line is composed of: 281 // <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA> 282 // 283 // We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time 284 func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { 285 var readBytes []byte 286 287 // Read the Mode & fname 288 readBytes, err = rd.ReadSlice('\x00') 289 if err != nil { 290 return 291 } 292 idx := bytes.IndexByte(readBytes, ' ') 293 if idx < 0 { 294 log.Debug("missing space in readBytes ParseTreeLine: %s", readBytes) 295 296 err = &ErrNotExist{} 297 return 298 } 299 300 n += idx + 1 301 copy(modeBuf, readBytes[:idx]) 302 if len(modeBuf) >= idx { 303 modeBuf = modeBuf[:idx] 304 } else { 305 modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...) 306 } 307 mode = modeBuf 308 309 readBytes = readBytes[idx+1:] 310 311 // Deal with the fname 312 copy(fnameBuf, readBytes) 313 if len(fnameBuf) > len(readBytes) { 314 fnameBuf = fnameBuf[:len(readBytes)] 315 } else { 316 fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...) 317 } 318 for err == bufio.ErrBufferFull { 319 readBytes, err = rd.ReadSlice('\x00') 320 fnameBuf = append(fnameBuf, readBytes...) 321 } 322 n += len(fnameBuf) 323 if err != nil { 324 return 325 } 326 fnameBuf = fnameBuf[:len(fnameBuf)-1] 327 fname = fnameBuf 328 329 // Deal with the 20-byte SHA 330 idx = 0 331 for idx < 20 { 332 read := 0 333 read, err = rd.Read(shaBuf[idx:20]) 334 n += read 335 if err != nil { 336 return 337 } 338 idx += read 339 } 340 sha = shaBuf 341 return 342 } 343 344 var callerPrefix string 345 346 func init() { 347 _, filename, _, _ := runtime.Caller(0) 348 callerPrefix = strings.TrimSuffix(filename, "modules/git/batch_reader.go") 349 }