code.gitea.io/gitea@v1.22.3/modules/git/batch_reader.go (about) 1 // Copyright 2020 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package git 5 6 import ( 7 "bufio" 8 "bytes" 9 "context" 10 "fmt" 11 "io" 12 "math" 13 "runtime" 14 "strconv" 15 "strings" 16 17 "code.gitea.io/gitea/modules/log" 18 19 "github.com/djherbis/buffer" 20 "github.com/djherbis/nio/v3" 21 ) 22 23 // WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function 24 type WriteCloserError interface { 25 io.WriteCloser 26 CloseWithError(err error) error 27 } 28 29 // ensureValidGitRepository runs git rev-parse in the repository path - thus ensuring that the repository is a valid repository. 30 // Run before opening git cat-file. 31 // This is needed otherwise the git cat-file will hang for invalid repositories. 32 func ensureValidGitRepository(ctx context.Context, repoPath string) error { 33 stderr := strings.Builder{} 34 err := NewCommand(ctx, "rev-parse"). 35 SetDescription(fmt.Sprintf("%s rev-parse [repo_path: %s]", GitExecutable, repoPath)). 36 Run(&RunOpts{ 37 Dir: repoPath, 38 Stderr: &stderr, 39 }) 40 if err != nil { 41 return ConcatenateError(err, (&stderr).String()) 42 } 43 return nil 44 } 45 46 // catFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function 47 func catFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { 48 batchStdinReader, batchStdinWriter := io.Pipe() 49 batchStdoutReader, batchStdoutWriter := io.Pipe() 50 ctx, ctxCancel := context.WithCancel(ctx) 51 closed := make(chan struct{}) 52 cancel := func() { 53 ctxCancel() 54 _ = batchStdoutReader.Close() 55 _ = batchStdinWriter.Close() 56 <-closed 57 } 58 59 // Ensure cancel is called as soon as the provided context is cancelled 60 go func() { 61 <-ctx.Done() 62 cancel() 63 }() 64 65 _, filename, line, _ := runtime.Caller(2) 66 filename = strings.TrimPrefix(filename, callerPrefix) 67 68 go func() { 69 stderr := strings.Builder{} 70 err := NewCommand(ctx, "cat-file", "--batch-check"). 71 SetDescription(fmt.Sprintf("%s cat-file --batch-check [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)). 72 Run(&RunOpts{ 73 Dir: repoPath, 74 Stdin: batchStdinReader, 75 Stdout: batchStdoutWriter, 76 Stderr: &stderr, 77 78 UseContextTimeout: true, 79 }) 80 if err != nil { 81 _ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) 82 _ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String())) 83 } else { 84 _ = batchStdoutWriter.Close() 85 _ = batchStdinReader.Close() 86 } 87 close(closed) 88 }() 89 90 // For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check 91 batchReader := bufio.NewReader(batchStdoutReader) 92 93 return batchStdinWriter, batchReader, cancel 94 } 95 96 // catFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function 97 func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { 98 // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. 99 // so let's create a batch stdin and stdout 100 batchStdinReader, batchStdinWriter := io.Pipe() 101 batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024)) 102 ctx, ctxCancel := context.WithCancel(ctx) 103 closed := make(chan struct{}) 104 cancel := func() { 105 ctxCancel() 106 _ = batchStdinWriter.Close() 107 _ = batchStdoutReader.Close() 108 <-closed 109 } 110 111 // Ensure cancel is called as soon as the provided context is cancelled 112 go func() { 113 <-ctx.Done() 114 cancel() 115 }() 116 117 _, filename, line, _ := runtime.Caller(2) 118 filename = strings.TrimPrefix(filename, callerPrefix) 119 120 go func() { 121 stderr := strings.Builder{} 122 err := NewCommand(ctx, "cat-file", "--batch"). 123 SetDescription(fmt.Sprintf("%s cat-file --batch [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)). 124 Run(&RunOpts{ 125 Dir: repoPath, 126 Stdin: batchStdinReader, 127 Stdout: batchStdoutWriter, 128 Stderr: &stderr, 129 130 UseContextTimeout: true, 131 }) 132 if err != nil { 133 _ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) 134 _ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String())) 135 } else { 136 _ = batchStdoutWriter.Close() 137 _ = batchStdinReader.Close() 138 } 139 close(closed) 140 }() 141 142 // For simplicities sake we'll us a buffered reader to read from the cat-file --batch 143 batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024) 144 145 return batchStdinWriter, batchReader, cancel 146 } 147 148 // ReadBatchLine reads the header line from cat-file --batch 149 // We expect: 150 // <sha> SP <type> SP <size> LF 151 // sha is a hex encoded here 152 func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) { 153 typ, err = rd.ReadString('\n') 154 if err != nil { 155 return sha, typ, size, err 156 } 157 if len(typ) == 1 { 158 typ, err = rd.ReadString('\n') 159 if err != nil { 160 return sha, typ, size, err 161 } 162 } 163 idx := strings.IndexByte(typ, ' ') 164 if idx < 0 { 165 log.Debug("missing space typ: %s", typ) 166 return sha, typ, size, ErrNotExist{ID: string(sha)} 167 } 168 sha = []byte(typ[:idx]) 169 typ = typ[idx+1:] 170 171 idx = strings.IndexByte(typ, ' ') 172 if idx < 0 { 173 return sha, typ, size, ErrNotExist{ID: string(sha)} 174 } 175 176 sizeStr := typ[idx+1 : len(typ)-1] 177 typ = typ[:idx] 178 179 size, err = strconv.ParseInt(sizeStr, 10, 64) 180 return sha, typ, size, err 181 } 182 183 // ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream. 184 func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) { 185 var id string 186 var n int64 187 headerLoop: 188 for { 189 line, err := rd.ReadBytes('\n') 190 if err != nil { 191 return "", err 192 } 193 n += int64(len(line)) 194 idx := bytes.Index(line, []byte{' '}) 195 if idx < 0 { 196 continue 197 } 198 199 if string(line[:idx]) == "object" { 200 id = string(line[idx+1 : len(line)-1]) 201 break headerLoop 202 } 203 } 204 205 // Discard the rest of the tag 206 return id, DiscardFull(rd, size-n+1) 207 } 208 209 // ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream. 210 func ReadTreeID(rd *bufio.Reader, size int64) (string, error) { 211 var id string 212 var n int64 213 headerLoop: 214 for { 215 line, err := rd.ReadBytes('\n') 216 if err != nil { 217 return "", err 218 } 219 n += int64(len(line)) 220 idx := bytes.Index(line, []byte{' '}) 221 if idx < 0 { 222 continue 223 } 224 225 if string(line[:idx]) == "tree" { 226 id = string(line[idx+1 : len(line)-1]) 227 break headerLoop 228 } 229 } 230 231 // Discard the rest of the commit 232 return id, DiscardFull(rd, size-n+1) 233 } 234 235 // git tree files are a list: 236 // <mode-in-ascii> SP <fname> NUL <binary Hash> 237 // 238 // Unfortunately this 20-byte notation is somewhat in conflict to all other git tools 239 // Therefore we need some method to convert these binary hashes to hex hashes 240 241 // constant hextable to help quickly convert between binary and hex representation 242 const hextable = "0123456789abcdef" 243 244 // BinToHexHeash converts a binary Hash into a hex encoded one. Input and output can be the 245 // same byte slice to support in place conversion without allocations. 246 // This is at least 100x quicker that hex.EncodeToString 247 func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte { 248 for i := objectFormat.FullLength()/2 - 1; i >= 0; i-- { 249 v := sha[i] 250 vhi, vlo := v>>4, v&0x0f 251 shi, slo := hextable[vhi], hextable[vlo] 252 out[i*2], out[i*2+1] = shi, slo 253 } 254 return out 255 } 256 257 // ParseTreeLine reads an entry from a tree in a cat-file --batch stream 258 // This carefully avoids allocations - except where fnameBuf is too small. 259 // It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations 260 // 261 // Each line is composed of: 262 // <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <binary HASH> 263 // 264 // We don't attempt to convert the raw HASH to save a lot of time 265 func ParseTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { 266 var readBytes []byte 267 268 // Read the Mode & fname 269 readBytes, err = rd.ReadSlice('\x00') 270 if err != nil { 271 return mode, fname, sha, n, err 272 } 273 idx := bytes.IndexByte(readBytes, ' ') 274 if idx < 0 { 275 log.Debug("missing space in readBytes ParseTreeLine: %s", readBytes) 276 return mode, fname, sha, n, &ErrNotExist{} 277 } 278 279 n += idx + 1 280 copy(modeBuf, readBytes[:idx]) 281 if len(modeBuf) >= idx { 282 modeBuf = modeBuf[:idx] 283 } else { 284 modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...) 285 } 286 mode = modeBuf 287 288 readBytes = readBytes[idx+1:] 289 290 // Deal with the fname 291 copy(fnameBuf, readBytes) 292 if len(fnameBuf) > len(readBytes) { 293 fnameBuf = fnameBuf[:len(readBytes)] 294 } else { 295 fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...) 296 } 297 for err == bufio.ErrBufferFull { 298 readBytes, err = rd.ReadSlice('\x00') 299 fnameBuf = append(fnameBuf, readBytes...) 300 } 301 n += len(fnameBuf) 302 if err != nil { 303 return mode, fname, sha, n, err 304 } 305 fnameBuf = fnameBuf[:len(fnameBuf)-1] 306 fname = fnameBuf 307 308 // Deal with the binary hash 309 idx = 0 310 length := objectFormat.FullLength() / 2 311 for idx < length { 312 var read int 313 read, err = rd.Read(shaBuf[idx:length]) 314 n += read 315 if err != nil { 316 return mode, fname, sha, n, err 317 } 318 idx += read 319 } 320 sha = shaBuf 321 return mode, fname, sha, n, err 322 } 323 324 var callerPrefix string 325 326 func init() { 327 _, filename, _, _ := runtime.Caller(0) 328 callerPrefix = strings.TrimSuffix(filename, "modules/git/batch_reader.go") 329 } 330 331 func DiscardFull(rd *bufio.Reader, discard int64) error { 332 if discard > math.MaxInt32 { 333 n, err := rd.Discard(math.MaxInt32) 334 discard -= int64(n) 335 if err != nil { 336 return err 337 } 338 } 339 for discard > 0 { 340 n, err := rd.Discard(int(discard)) 341 discard -= int64(n) 342 if err != nil { 343 return err 344 } 345 } 346 return nil 347 }