code.gitea.io/gitea@v1.19.3/modules/git/batch_reader.go (about)

     1  // Copyright 2020 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package git
     5  
     6  import (
     7  	"bufio"
     8  	"bytes"
     9  	"context"
    10  	"fmt"
    11  	"io"
    12  	"math"
    13  	"runtime"
    14  	"strconv"
    15  	"strings"
    16  
    17  	"code.gitea.io/gitea/modules/log"
    18  
    19  	"github.com/djherbis/buffer"
    20  	"github.com/djherbis/nio/v3"
    21  )
    22  
    23  // WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
    24  type WriteCloserError interface {
    25  	io.WriteCloser
    26  	CloseWithError(err error) error
    27  }
    28  
    29  // EnsureValidGitRepository runs git rev-parse in the repository path - thus ensuring that the repository is a valid repository.
    30  // Run before opening git cat-file.
    31  // This is needed otherwise the git cat-file will hang for invalid repositories.
    32  func EnsureValidGitRepository(ctx context.Context, repoPath string) error {
    33  	stderr := strings.Builder{}
    34  	err := NewCommand(ctx, "rev-parse").
    35  		SetDescription(fmt.Sprintf("%s rev-parse [repo_path: %s]", GitExecutable, repoPath)).
    36  		Run(&RunOpts{
    37  			Dir:    repoPath,
    38  			Stderr: &stderr,
    39  		})
    40  	if err != nil {
    41  		return ConcatenateError(err, (&stderr).String())
    42  	}
    43  	return nil
    44  }
    45  
    46  // CatFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
    47  func CatFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
    48  	batchStdinReader, batchStdinWriter := io.Pipe()
    49  	batchStdoutReader, batchStdoutWriter := io.Pipe()
    50  	ctx, ctxCancel := context.WithCancel(ctx)
    51  	closed := make(chan struct{})
    52  	cancel := func() {
    53  		ctxCancel()
    54  		_ = batchStdoutReader.Close()
    55  		_ = batchStdinWriter.Close()
    56  		<-closed
    57  	}
    58  
    59  	// Ensure cancel is called as soon as the provided context is cancelled
    60  	go func() {
    61  		<-ctx.Done()
    62  		cancel()
    63  	}()
    64  
    65  	_, filename, line, _ := runtime.Caller(2)
    66  	filename = strings.TrimPrefix(filename, callerPrefix)
    67  
    68  	go func() {
    69  		stderr := strings.Builder{}
    70  		err := NewCommand(ctx, "cat-file", "--batch-check").
    71  			SetDescription(fmt.Sprintf("%s cat-file --batch-check [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)).
    72  			Run(&RunOpts{
    73  				Dir:    repoPath,
    74  				Stdin:  batchStdinReader,
    75  				Stdout: batchStdoutWriter,
    76  				Stderr: &stderr,
    77  			})
    78  		if err != nil {
    79  			_ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
    80  			_ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String()))
    81  		} else {
    82  			_ = batchStdoutWriter.Close()
    83  			_ = batchStdinReader.Close()
    84  		}
    85  		close(closed)
    86  	}()
    87  
    88  	// For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
    89  	batchReader := bufio.NewReader(batchStdoutReader)
    90  
    91  	return batchStdinWriter, batchReader, cancel
    92  }
    93  
    94  // CatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
    95  func CatFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
    96  	// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
    97  	// so let's create a batch stdin and stdout
    98  	batchStdinReader, batchStdinWriter := io.Pipe()
    99  	batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
   100  	ctx, ctxCancel := context.WithCancel(ctx)
   101  	closed := make(chan struct{})
   102  	cancel := func() {
   103  		ctxCancel()
   104  		_ = batchStdinWriter.Close()
   105  		_ = batchStdoutReader.Close()
   106  		<-closed
   107  	}
   108  
   109  	// Ensure cancel is called as soon as the provided context is cancelled
   110  	go func() {
   111  		<-ctx.Done()
   112  		cancel()
   113  	}()
   114  
   115  	_, filename, line, _ := runtime.Caller(2)
   116  	filename = strings.TrimPrefix(filename, callerPrefix)
   117  
   118  	go func() {
   119  		stderr := strings.Builder{}
   120  		err := NewCommand(ctx, "cat-file", "--batch").
   121  			SetDescription(fmt.Sprintf("%s cat-file --batch [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)).
   122  			Run(&RunOpts{
   123  				Dir:    repoPath,
   124  				Stdin:  batchStdinReader,
   125  				Stdout: batchStdoutWriter,
   126  				Stderr: &stderr,
   127  			})
   128  		if err != nil {
   129  			_ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
   130  			_ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String()))
   131  		} else {
   132  			_ = batchStdoutWriter.Close()
   133  			_ = batchStdinReader.Close()
   134  		}
   135  		close(closed)
   136  	}()
   137  
   138  	// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
   139  	batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
   140  
   141  	return batchStdinWriter, batchReader, cancel
   142  }
   143  
   144  // ReadBatchLine reads the header line from cat-file --batch
   145  // We expect:
   146  // <sha> SP <type> SP <size> LF
   147  // sha is a 40byte not 20byte here
   148  func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
   149  	typ, err = rd.ReadString('\n')
   150  	if err != nil {
   151  		return
   152  	}
   153  	if len(typ) == 1 {
   154  		typ, err = rd.ReadString('\n')
   155  		if err != nil {
   156  			return
   157  		}
   158  	}
   159  	idx := strings.IndexByte(typ, ' ')
   160  	if idx < 0 {
   161  		log.Debug("missing space typ: %s", typ)
   162  		err = ErrNotExist{ID: string(sha)}
   163  		return
   164  	}
   165  	sha = []byte(typ[:idx])
   166  	typ = typ[idx+1:]
   167  
   168  	idx = strings.IndexByte(typ, ' ')
   169  	if idx < 0 {
   170  		err = ErrNotExist{ID: string(sha)}
   171  		return
   172  	}
   173  
   174  	sizeStr := typ[idx+1 : len(typ)-1]
   175  	typ = typ[:idx]
   176  
   177  	size, err = strconv.ParseInt(sizeStr, 10, 64)
   178  	return sha, typ, size, err
   179  }
   180  
   181  // ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream.
   182  func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) {
   183  	var id string
   184  	var n int64
   185  headerLoop:
   186  	for {
   187  		line, err := rd.ReadBytes('\n')
   188  		if err != nil {
   189  			return "", err
   190  		}
   191  		n += int64(len(line))
   192  		idx := bytes.Index(line, []byte{' '})
   193  		if idx < 0 {
   194  			continue
   195  		}
   196  
   197  		if string(line[:idx]) == "object" {
   198  			id = string(line[idx+1 : len(line)-1])
   199  			break headerLoop
   200  		}
   201  	}
   202  
   203  	// Discard the rest of the tag
   204  	discard := size - n + 1
   205  	for discard > math.MaxInt32 {
   206  		_, err := rd.Discard(math.MaxInt32)
   207  		if err != nil {
   208  			return id, err
   209  		}
   210  		discard -= math.MaxInt32
   211  	}
   212  	_, err := rd.Discard(int(discard))
   213  	return id, err
   214  }
   215  
   216  // ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream.
   217  func ReadTreeID(rd *bufio.Reader, size int64) (string, error) {
   218  	var id string
   219  	var n int64
   220  headerLoop:
   221  	for {
   222  		line, err := rd.ReadBytes('\n')
   223  		if err != nil {
   224  			return "", err
   225  		}
   226  		n += int64(len(line))
   227  		idx := bytes.Index(line, []byte{' '})
   228  		if idx < 0 {
   229  			continue
   230  		}
   231  
   232  		if string(line[:idx]) == "tree" {
   233  			id = string(line[idx+1 : len(line)-1])
   234  			break headerLoop
   235  		}
   236  	}
   237  
   238  	// Discard the rest of the commit
   239  	discard := size - n + 1
   240  	for discard > math.MaxInt32 {
   241  		_, err := rd.Discard(math.MaxInt32)
   242  		if err != nil {
   243  			return id, err
   244  		}
   245  		discard -= math.MaxInt32
   246  	}
   247  	_, err := rd.Discard(int(discard))
   248  	return id, err
   249  }
   250  
   251  // git tree files are a list:
   252  // <mode-in-ascii> SP <fname> NUL <20-byte SHA>
   253  //
   254  // Unfortunately this 20-byte notation is somewhat in conflict to all other git tools
   255  // Therefore we need some method to convert these 20-byte SHAs to a 40-byte SHA
   256  
   257  // constant hextable to help quickly convert between 20byte and 40byte hashes
   258  const hextable = "0123456789abcdef"
   259  
   260  // To40ByteSHA converts a 20-byte SHA into a 40-byte sha. Input and output can be the
   261  // same 40 byte slice to support in place conversion without allocations.
   262  // This is at least 100x quicker that hex.EncodeToString
   263  // NB This requires that out is a 40-byte slice
   264  func To40ByteSHA(sha, out []byte) []byte {
   265  	for i := 19; i >= 0; i-- {
   266  		v := sha[i]
   267  		vhi, vlo := v>>4, v&0x0f
   268  		shi, slo := hextable[vhi], hextable[vlo]
   269  		out[i*2], out[i*2+1] = shi, slo
   270  	}
   271  	return out
   272  }
   273  
   274  // ParseTreeLine reads an entry from a tree in a cat-file --batch stream
   275  // This carefully avoids allocations - except where fnameBuf is too small.
   276  // It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
   277  //
   278  // Each line is composed of:
   279  // <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
   280  //
   281  // We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
   282  func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
   283  	var readBytes []byte
   284  
   285  	// Read the Mode & fname
   286  	readBytes, err = rd.ReadSlice('\x00')
   287  	if err != nil {
   288  		return
   289  	}
   290  	idx := bytes.IndexByte(readBytes, ' ')
   291  	if idx < 0 {
   292  		log.Debug("missing space in readBytes ParseTreeLine: %s", readBytes)
   293  
   294  		err = &ErrNotExist{}
   295  		return
   296  	}
   297  
   298  	n += idx + 1
   299  	copy(modeBuf, readBytes[:idx])
   300  	if len(modeBuf) >= idx {
   301  		modeBuf = modeBuf[:idx]
   302  	} else {
   303  		modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
   304  	}
   305  	mode = modeBuf
   306  
   307  	readBytes = readBytes[idx+1:]
   308  
   309  	// Deal with the fname
   310  	copy(fnameBuf, readBytes)
   311  	if len(fnameBuf) > len(readBytes) {
   312  		fnameBuf = fnameBuf[:len(readBytes)]
   313  	} else {
   314  		fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...)
   315  	}
   316  	for err == bufio.ErrBufferFull {
   317  		readBytes, err = rd.ReadSlice('\x00')
   318  		fnameBuf = append(fnameBuf, readBytes...)
   319  	}
   320  	n += len(fnameBuf)
   321  	if err != nil {
   322  		return
   323  	}
   324  	fnameBuf = fnameBuf[:len(fnameBuf)-1]
   325  	fname = fnameBuf
   326  
   327  	// Deal with the 20-byte SHA
   328  	idx = 0
   329  	for idx < 20 {
   330  		var read int
   331  		read, err = rd.Read(shaBuf[idx:20])
   332  		n += read
   333  		if err != nil {
   334  			return
   335  		}
   336  		idx += read
   337  	}
   338  	sha = shaBuf
   339  	return mode, fname, sha, n, err
   340  }
   341  
   342  var callerPrefix string
   343  
   344  func init() {
   345  	_, filename, _, _ := runtime.Caller(0)
   346  	callerPrefix = strings.TrimSuffix(filename, "modules/git/batch_reader.go")
   347  }