code.gitea.io/gitea@v1.19.3/modules/git/batch_reader.go (about) 1 // Copyright 2020 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package git 5 6 import ( 7 "bufio" 8 "bytes" 9 "context" 10 "fmt" 11 "io" 12 "math" 13 "runtime" 14 "strconv" 15 "strings" 16 17 "code.gitea.io/gitea/modules/log" 18 19 "github.com/djherbis/buffer" 20 "github.com/djherbis/nio/v3" 21 ) 22 23 // WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function 24 type WriteCloserError interface { 25 io.WriteCloser 26 CloseWithError(err error) error 27 } 28 29 // EnsureValidGitRepository runs git rev-parse in the repository path - thus ensuring that the repository is a valid repository. 30 // Run before opening git cat-file. 31 // This is needed otherwise the git cat-file will hang for invalid repositories. 32 func EnsureValidGitRepository(ctx context.Context, repoPath string) error { 33 stderr := strings.Builder{} 34 err := NewCommand(ctx, "rev-parse"). 35 SetDescription(fmt.Sprintf("%s rev-parse [repo_path: %s]", GitExecutable, repoPath)). 36 Run(&RunOpts{ 37 Dir: repoPath, 38 Stderr: &stderr, 39 }) 40 if err != nil { 41 return ConcatenateError(err, (&stderr).String()) 42 } 43 return nil 44 } 45 46 // CatFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function 47 func CatFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { 48 batchStdinReader, batchStdinWriter := io.Pipe() 49 batchStdoutReader, batchStdoutWriter := io.Pipe() 50 ctx, ctxCancel := context.WithCancel(ctx) 51 closed := make(chan struct{}) 52 cancel := func() { 53 ctxCancel() 54 _ = batchStdoutReader.Close() 55 _ = batchStdinWriter.Close() 56 <-closed 57 } 58 59 // Ensure cancel is called as soon as the provided context is cancelled 60 go func() { 61 <-ctx.Done() 62 cancel() 63 }() 64 65 _, filename, line, _ := runtime.Caller(2) 66 filename = strings.TrimPrefix(filename, callerPrefix) 67 68 go func() { 69 stderr := strings.Builder{} 70 err := NewCommand(ctx, "cat-file", "--batch-check"). 71 SetDescription(fmt.Sprintf("%s cat-file --batch-check [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)). 72 Run(&RunOpts{ 73 Dir: repoPath, 74 Stdin: batchStdinReader, 75 Stdout: batchStdoutWriter, 76 Stderr: &stderr, 77 }) 78 if err != nil { 79 _ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) 80 _ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String())) 81 } else { 82 _ = batchStdoutWriter.Close() 83 _ = batchStdinReader.Close() 84 } 85 close(closed) 86 }() 87 88 // For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check 89 batchReader := bufio.NewReader(batchStdoutReader) 90 91 return batchStdinWriter, batchReader, cancel 92 } 93 94 // CatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function 95 func CatFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { 96 // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary. 97 // so let's create a batch stdin and stdout 98 batchStdinReader, batchStdinWriter := io.Pipe() 99 batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024)) 100 ctx, ctxCancel := context.WithCancel(ctx) 101 closed := make(chan struct{}) 102 cancel := func() { 103 ctxCancel() 104 _ = batchStdinWriter.Close() 105 _ = batchStdoutReader.Close() 106 <-closed 107 } 108 109 // Ensure cancel is called as soon as the provided context is cancelled 110 go func() { 111 <-ctx.Done() 112 cancel() 113 }() 114 115 _, filename, line, _ := runtime.Caller(2) 116 filename = strings.TrimPrefix(filename, callerPrefix) 117 118 go func() { 119 stderr := strings.Builder{} 120 err := NewCommand(ctx, "cat-file", "--batch"). 121 SetDescription(fmt.Sprintf("%s cat-file --batch [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)). 122 Run(&RunOpts{ 123 Dir: repoPath, 124 Stdin: batchStdinReader, 125 Stdout: batchStdoutWriter, 126 Stderr: &stderr, 127 }) 128 if err != nil { 129 _ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) 130 _ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String())) 131 } else { 132 _ = batchStdoutWriter.Close() 133 _ = batchStdinReader.Close() 134 } 135 close(closed) 136 }() 137 138 // For simplicities sake we'll us a buffered reader to read from the cat-file --batch 139 batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024) 140 141 return batchStdinWriter, batchReader, cancel 142 } 143 144 // ReadBatchLine reads the header line from cat-file --batch 145 // We expect: 146 // <sha> SP <type> SP <size> LF 147 // sha is a 40byte not 20byte here 148 func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) { 149 typ, err = rd.ReadString('\n') 150 if err != nil { 151 return 152 } 153 if len(typ) == 1 { 154 typ, err = rd.ReadString('\n') 155 if err != nil { 156 return 157 } 158 } 159 idx := strings.IndexByte(typ, ' ') 160 if idx < 0 { 161 log.Debug("missing space typ: %s", typ) 162 err = ErrNotExist{ID: string(sha)} 163 return 164 } 165 sha = []byte(typ[:idx]) 166 typ = typ[idx+1:] 167 168 idx = strings.IndexByte(typ, ' ') 169 if idx < 0 { 170 err = ErrNotExist{ID: string(sha)} 171 return 172 } 173 174 sizeStr := typ[idx+1 : len(typ)-1] 175 typ = typ[:idx] 176 177 size, err = strconv.ParseInt(sizeStr, 10, 64) 178 return sha, typ, size, err 179 } 180 181 // ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream. 182 func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) { 183 var id string 184 var n int64 185 headerLoop: 186 for { 187 line, err := rd.ReadBytes('\n') 188 if err != nil { 189 return "", err 190 } 191 n += int64(len(line)) 192 idx := bytes.Index(line, []byte{' '}) 193 if idx < 0 { 194 continue 195 } 196 197 if string(line[:idx]) == "object" { 198 id = string(line[idx+1 : len(line)-1]) 199 break headerLoop 200 } 201 } 202 203 // Discard the rest of the tag 204 discard := size - n + 1 205 for discard > math.MaxInt32 { 206 _, err := rd.Discard(math.MaxInt32) 207 if err != nil { 208 return id, err 209 } 210 discard -= math.MaxInt32 211 } 212 _, err := rd.Discard(int(discard)) 213 return id, err 214 } 215 216 // ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream. 217 func ReadTreeID(rd *bufio.Reader, size int64) (string, error) { 218 var id string 219 var n int64 220 headerLoop: 221 for { 222 line, err := rd.ReadBytes('\n') 223 if err != nil { 224 return "", err 225 } 226 n += int64(len(line)) 227 idx := bytes.Index(line, []byte{' '}) 228 if idx < 0 { 229 continue 230 } 231 232 if string(line[:idx]) == "tree" { 233 id = string(line[idx+1 : len(line)-1]) 234 break headerLoop 235 } 236 } 237 238 // Discard the rest of the commit 239 discard := size - n + 1 240 for discard > math.MaxInt32 { 241 _, err := rd.Discard(math.MaxInt32) 242 if err != nil { 243 return id, err 244 } 245 discard -= math.MaxInt32 246 } 247 _, err := rd.Discard(int(discard)) 248 return id, err 249 } 250 251 // git tree files are a list: 252 // <mode-in-ascii> SP <fname> NUL <20-byte SHA> 253 // 254 // Unfortunately this 20-byte notation is somewhat in conflict to all other git tools 255 // Therefore we need some method to convert these 20-byte SHAs to a 40-byte SHA 256 257 // constant hextable to help quickly convert between 20byte and 40byte hashes 258 const hextable = "0123456789abcdef" 259 260 // To40ByteSHA converts a 20-byte SHA into a 40-byte sha. Input and output can be the 261 // same 40 byte slice to support in place conversion without allocations. 262 // This is at least 100x quicker that hex.EncodeToString 263 // NB This requires that out is a 40-byte slice 264 func To40ByteSHA(sha, out []byte) []byte { 265 for i := 19; i >= 0; i-- { 266 v := sha[i] 267 vhi, vlo := v>>4, v&0x0f 268 shi, slo := hextable[vhi], hextable[vlo] 269 out[i*2], out[i*2+1] = shi, slo 270 } 271 return out 272 } 273 274 // ParseTreeLine reads an entry from a tree in a cat-file --batch stream 275 // This carefully avoids allocations - except where fnameBuf is too small. 276 // It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations 277 // 278 // Each line is composed of: 279 // <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA> 280 // 281 // We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time 282 func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { 283 var readBytes []byte 284 285 // Read the Mode & fname 286 readBytes, err = rd.ReadSlice('\x00') 287 if err != nil { 288 return 289 } 290 idx := bytes.IndexByte(readBytes, ' ') 291 if idx < 0 { 292 log.Debug("missing space in readBytes ParseTreeLine: %s", readBytes) 293 294 err = &ErrNotExist{} 295 return 296 } 297 298 n += idx + 1 299 copy(modeBuf, readBytes[:idx]) 300 if len(modeBuf) >= idx { 301 modeBuf = modeBuf[:idx] 302 } else { 303 modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...) 304 } 305 mode = modeBuf 306 307 readBytes = readBytes[idx+1:] 308 309 // Deal with the fname 310 copy(fnameBuf, readBytes) 311 if len(fnameBuf) > len(readBytes) { 312 fnameBuf = fnameBuf[:len(readBytes)] 313 } else { 314 fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...) 315 } 316 for err == bufio.ErrBufferFull { 317 readBytes, err = rd.ReadSlice('\x00') 318 fnameBuf = append(fnameBuf, readBytes...) 319 } 320 n += len(fnameBuf) 321 if err != nil { 322 return 323 } 324 fnameBuf = fnameBuf[:len(fnameBuf)-1] 325 fname = fnameBuf 326 327 // Deal with the 20-byte SHA 328 idx = 0 329 for idx < 20 { 330 var read int 331 read, err = rd.Read(shaBuf[idx:20]) 332 n += read 333 if err != nil { 334 return 335 } 336 idx += read 337 } 338 sha = shaBuf 339 return mode, fname, sha, n, err 340 } 341 342 var callerPrefix string 343 344 func init() { 345 _, filename, _, _ := runtime.Caller(0) 346 callerPrefix = strings.TrimSuffix(filename, "modules/git/batch_reader.go") 347 }