code.gitea.io/gitea@v1.22.3/modules/git/batch_reader.go (about)

     1  // Copyright 2020 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package git
     5  
     6  import (
     7  	"bufio"
     8  	"bytes"
     9  	"context"
    10  	"fmt"
    11  	"io"
    12  	"math"
    13  	"runtime"
    14  	"strconv"
    15  	"strings"
    16  
    17  	"code.gitea.io/gitea/modules/log"
    18  
    19  	"github.com/djherbis/buffer"
    20  	"github.com/djherbis/nio/v3"
    21  )
    22  
    23  // WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
    24  type WriteCloserError interface {
    25  	io.WriteCloser
    26  	CloseWithError(err error) error
    27  }
    28  
    29  // ensureValidGitRepository runs git rev-parse in the repository path - thus ensuring that the repository is a valid repository.
    30  // Run before opening git cat-file.
    31  // This is needed otherwise the git cat-file will hang for invalid repositories.
    32  func ensureValidGitRepository(ctx context.Context, repoPath string) error {
    33  	stderr := strings.Builder{}
    34  	err := NewCommand(ctx, "rev-parse").
    35  		SetDescription(fmt.Sprintf("%s rev-parse [repo_path: %s]", GitExecutable, repoPath)).
    36  		Run(&RunOpts{
    37  			Dir:    repoPath,
    38  			Stderr: &stderr,
    39  		})
    40  	if err != nil {
    41  		return ConcatenateError(err, (&stderr).String())
    42  	}
    43  	return nil
    44  }
    45  
    46  // catFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
    47  func catFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
    48  	batchStdinReader, batchStdinWriter := io.Pipe()
    49  	batchStdoutReader, batchStdoutWriter := io.Pipe()
    50  	ctx, ctxCancel := context.WithCancel(ctx)
    51  	closed := make(chan struct{})
    52  	cancel := func() {
    53  		ctxCancel()
    54  		_ = batchStdoutReader.Close()
    55  		_ = batchStdinWriter.Close()
    56  		<-closed
    57  	}
    58  
    59  	// Ensure cancel is called as soon as the provided context is cancelled
    60  	go func() {
    61  		<-ctx.Done()
    62  		cancel()
    63  	}()
    64  
    65  	_, filename, line, _ := runtime.Caller(2)
    66  	filename = strings.TrimPrefix(filename, callerPrefix)
    67  
    68  	go func() {
    69  		stderr := strings.Builder{}
    70  		err := NewCommand(ctx, "cat-file", "--batch-check").
    71  			SetDescription(fmt.Sprintf("%s cat-file --batch-check [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)).
    72  			Run(&RunOpts{
    73  				Dir:    repoPath,
    74  				Stdin:  batchStdinReader,
    75  				Stdout: batchStdoutWriter,
    76  				Stderr: &stderr,
    77  
    78  				UseContextTimeout: true,
    79  			})
    80  		if err != nil {
    81  			_ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
    82  			_ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String()))
    83  		} else {
    84  			_ = batchStdoutWriter.Close()
    85  			_ = batchStdinReader.Close()
    86  		}
    87  		close(closed)
    88  	}()
    89  
    90  	// For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
    91  	batchReader := bufio.NewReader(batchStdoutReader)
    92  
    93  	return batchStdinWriter, batchReader, cancel
    94  }
    95  
    96  // catFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
    97  func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
    98  	// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
    99  	// so let's create a batch stdin and stdout
   100  	batchStdinReader, batchStdinWriter := io.Pipe()
   101  	batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
   102  	ctx, ctxCancel := context.WithCancel(ctx)
   103  	closed := make(chan struct{})
   104  	cancel := func() {
   105  		ctxCancel()
   106  		_ = batchStdinWriter.Close()
   107  		_ = batchStdoutReader.Close()
   108  		<-closed
   109  	}
   110  
   111  	// Ensure cancel is called as soon as the provided context is cancelled
   112  	go func() {
   113  		<-ctx.Done()
   114  		cancel()
   115  	}()
   116  
   117  	_, filename, line, _ := runtime.Caller(2)
   118  	filename = strings.TrimPrefix(filename, callerPrefix)
   119  
   120  	go func() {
   121  		stderr := strings.Builder{}
   122  		err := NewCommand(ctx, "cat-file", "--batch").
   123  			SetDescription(fmt.Sprintf("%s cat-file --batch [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)).
   124  			Run(&RunOpts{
   125  				Dir:    repoPath,
   126  				Stdin:  batchStdinReader,
   127  				Stdout: batchStdoutWriter,
   128  				Stderr: &stderr,
   129  
   130  				UseContextTimeout: true,
   131  			})
   132  		if err != nil {
   133  			_ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
   134  			_ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String()))
   135  		} else {
   136  			_ = batchStdoutWriter.Close()
   137  			_ = batchStdinReader.Close()
   138  		}
   139  		close(closed)
   140  	}()
   141  
   142  	// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
   143  	batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
   144  
   145  	return batchStdinWriter, batchReader, cancel
   146  }
   147  
   148  // ReadBatchLine reads the header line from cat-file --batch
   149  // We expect:
   150  // <sha> SP <type> SP <size> LF
   151  // sha is a hex encoded here
   152  func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
   153  	typ, err = rd.ReadString('\n')
   154  	if err != nil {
   155  		return sha, typ, size, err
   156  	}
   157  	if len(typ) == 1 {
   158  		typ, err = rd.ReadString('\n')
   159  		if err != nil {
   160  			return sha, typ, size, err
   161  		}
   162  	}
   163  	idx := strings.IndexByte(typ, ' ')
   164  	if idx < 0 {
   165  		log.Debug("missing space typ: %s", typ)
   166  		return sha, typ, size, ErrNotExist{ID: string(sha)}
   167  	}
   168  	sha = []byte(typ[:idx])
   169  	typ = typ[idx+1:]
   170  
   171  	idx = strings.IndexByte(typ, ' ')
   172  	if idx < 0 {
   173  		return sha, typ, size, ErrNotExist{ID: string(sha)}
   174  	}
   175  
   176  	sizeStr := typ[idx+1 : len(typ)-1]
   177  	typ = typ[:idx]
   178  
   179  	size, err = strconv.ParseInt(sizeStr, 10, 64)
   180  	return sha, typ, size, err
   181  }
   182  
   183  // ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream.
   184  func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) {
   185  	var id string
   186  	var n int64
   187  headerLoop:
   188  	for {
   189  		line, err := rd.ReadBytes('\n')
   190  		if err != nil {
   191  			return "", err
   192  		}
   193  		n += int64(len(line))
   194  		idx := bytes.Index(line, []byte{' '})
   195  		if idx < 0 {
   196  			continue
   197  		}
   198  
   199  		if string(line[:idx]) == "object" {
   200  			id = string(line[idx+1 : len(line)-1])
   201  			break headerLoop
   202  		}
   203  	}
   204  
   205  	// Discard the rest of the tag
   206  	return id, DiscardFull(rd, size-n+1)
   207  }
   208  
   209  // ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream.
   210  func ReadTreeID(rd *bufio.Reader, size int64) (string, error) {
   211  	var id string
   212  	var n int64
   213  headerLoop:
   214  	for {
   215  		line, err := rd.ReadBytes('\n')
   216  		if err != nil {
   217  			return "", err
   218  		}
   219  		n += int64(len(line))
   220  		idx := bytes.Index(line, []byte{' '})
   221  		if idx < 0 {
   222  			continue
   223  		}
   224  
   225  		if string(line[:idx]) == "tree" {
   226  			id = string(line[idx+1 : len(line)-1])
   227  			break headerLoop
   228  		}
   229  	}
   230  
   231  	// Discard the rest of the commit
   232  	return id, DiscardFull(rd, size-n+1)
   233  }
   234  
   235  // git tree files are a list:
   236  // <mode-in-ascii> SP <fname> NUL <binary Hash>
   237  //
   238  // Unfortunately this 20-byte notation is somewhat in conflict to all other git tools
   239  // Therefore we need some method to convert these binary hashes to hex hashes
   240  
   241  // constant hextable to help quickly convert between binary and hex representation
   242  const hextable = "0123456789abcdef"
   243  
   244  // BinToHexHeash converts a binary Hash into a hex encoded one. Input and output can be the
   245  // same byte slice to support in place conversion without allocations.
   246  // This is at least 100x quicker that hex.EncodeToString
   247  func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte {
   248  	for i := objectFormat.FullLength()/2 - 1; i >= 0; i-- {
   249  		v := sha[i]
   250  		vhi, vlo := v>>4, v&0x0f
   251  		shi, slo := hextable[vhi], hextable[vlo]
   252  		out[i*2], out[i*2+1] = shi, slo
   253  	}
   254  	return out
   255  }
   256  
   257  // ParseTreeLine reads an entry from a tree in a cat-file --batch stream
   258  // This carefully avoids allocations - except where fnameBuf is too small.
   259  // It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
   260  //
   261  // Each line is composed of:
   262  // <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <binary HASH>
   263  //
   264  // We don't attempt to convert the raw HASH to save a lot of time
   265  func ParseTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
   266  	var readBytes []byte
   267  
   268  	// Read the Mode & fname
   269  	readBytes, err = rd.ReadSlice('\x00')
   270  	if err != nil {
   271  		return mode, fname, sha, n, err
   272  	}
   273  	idx := bytes.IndexByte(readBytes, ' ')
   274  	if idx < 0 {
   275  		log.Debug("missing space in readBytes ParseTreeLine: %s", readBytes)
   276  		return mode, fname, sha, n, &ErrNotExist{}
   277  	}
   278  
   279  	n += idx + 1
   280  	copy(modeBuf, readBytes[:idx])
   281  	if len(modeBuf) >= idx {
   282  		modeBuf = modeBuf[:idx]
   283  	} else {
   284  		modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
   285  	}
   286  	mode = modeBuf
   287  
   288  	readBytes = readBytes[idx+1:]
   289  
   290  	// Deal with the fname
   291  	copy(fnameBuf, readBytes)
   292  	if len(fnameBuf) > len(readBytes) {
   293  		fnameBuf = fnameBuf[:len(readBytes)]
   294  	} else {
   295  		fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...)
   296  	}
   297  	for err == bufio.ErrBufferFull {
   298  		readBytes, err = rd.ReadSlice('\x00')
   299  		fnameBuf = append(fnameBuf, readBytes...)
   300  	}
   301  	n += len(fnameBuf)
   302  	if err != nil {
   303  		return mode, fname, sha, n, err
   304  	}
   305  	fnameBuf = fnameBuf[:len(fnameBuf)-1]
   306  	fname = fnameBuf
   307  
   308  	// Deal with the binary hash
   309  	idx = 0
   310  	length := objectFormat.FullLength() / 2
   311  	for idx < length {
   312  		var read int
   313  		read, err = rd.Read(shaBuf[idx:length])
   314  		n += read
   315  		if err != nil {
   316  			return mode, fname, sha, n, err
   317  		}
   318  		idx += read
   319  	}
   320  	sha = shaBuf
   321  	return mode, fname, sha, n, err
   322  }
   323  
   324  var callerPrefix string
   325  
   326  func init() {
   327  	_, filename, _, _ := runtime.Caller(0)
   328  	callerPrefix = strings.TrimSuffix(filename, "modules/git/batch_reader.go")
   329  }
   330  
   331  func DiscardFull(rd *bufio.Reader, discard int64) error {
   332  	if discard > math.MaxInt32 {
   333  		n, err := rd.Discard(math.MaxInt32)
   334  		discard -= int64(n)
   335  		if err != nil {
   336  			return err
   337  		}
   338  	}
   339  	for discard > 0 {
   340  		n, err := rd.Discard(int(discard))
   341  		discard -= int64(n)
   342  		if err != nil {
   343  			return err
   344  		}
   345  	}
   346  	return nil
   347  }