github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/git/batch_reader.go (about)

     1  // Copyright 2023 The GitBundle Inc. All rights reserved.
     2  // Copyright 2017 The Gitea Authors. All rights reserved.
     3  // Use of this source code is governed by a MIT-style
     4  // license that can be found in the LICENSE file.
     5  
     6  package git
     7  
     8  import (
     9  	"bufio"
    10  	"bytes"
    11  	"context"
    12  	"fmt"
    13  	"io"
    14  	"math"
    15  	"runtime"
    16  	"strconv"
    17  	"strings"
    18  
    19  	"github.com/gitbundle/modules/log"
    20  
    21  	"github.com/djherbis/buffer"
    22  	"github.com/djherbis/nio/v3"
    23  )
    24  
    25  // WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
    26  type WriteCloserError interface {
    27  	io.WriteCloser
    28  	CloseWithError(err error) error
    29  }
    30  
    31  // EnsureValidGitRepository runs git rev-parse in the repository path - thus ensuring that the repository is a valid repository.
    32  // Run before opening git cat-file.
    33  // This is needed otherwise the git cat-file will hang for invalid repositories.
    34  func EnsureValidGitRepository(ctx context.Context, repoPath string) error {
    35  	stderr := strings.Builder{}
    36  	err := NewCommand(ctx, "rev-parse").
    37  		SetDescription(fmt.Sprintf("%s rev-parse [repo_path: %s]", GitExecutable, repoPath)).
    38  		Run(&RunOpts{
    39  			Dir:    repoPath,
    40  			Stderr: &stderr,
    41  		})
    42  	if err != nil {
    43  		return ConcatenateError(err, (&stderr).String())
    44  	}
    45  	return nil
    46  }
    47  
    48  // CatFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
    49  func CatFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
    50  	batchStdinReader, batchStdinWriter := io.Pipe()
    51  	batchStdoutReader, batchStdoutWriter := io.Pipe()
    52  	ctx, ctxCancel := context.WithCancel(ctx)
    53  	closed := make(chan struct{})
    54  	cancel := func() {
    55  		ctxCancel()
    56  		_ = batchStdoutReader.Close()
    57  		_ = batchStdinWriter.Close()
    58  		<-closed
    59  	}
    60  
    61  	// Ensure cancel is called as soon as the provided context is cancelled
    62  	go func() {
    63  		<-ctx.Done()
    64  		cancel()
    65  	}()
    66  
    67  	_, filename, line, _ := runtime.Caller(2)
    68  	filename = strings.TrimPrefix(filename, callerPrefix)
    69  
    70  	go func() {
    71  		stderr := strings.Builder{}
    72  		err := NewCommand(ctx, "cat-file", "--batch-check").
    73  			SetDescription(fmt.Sprintf("%s cat-file --batch-check [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)).
    74  			Run(&RunOpts{
    75  				Dir:    repoPath,
    76  				Stdin:  batchStdinReader,
    77  				Stdout: batchStdoutWriter,
    78  				Stderr: &stderr,
    79  			})
    80  		if err != nil {
    81  			_ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
    82  			_ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String()))
    83  		} else {
    84  			_ = batchStdoutWriter.Close()
    85  			_ = batchStdinReader.Close()
    86  		}
    87  		close(closed)
    88  	}()
    89  
    90  	// For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
    91  	batchReader := bufio.NewReader(batchStdoutReader)
    92  
    93  	return batchStdinWriter, batchReader, cancel
    94  }
    95  
    96  // CatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
    97  func CatFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) {
    98  	// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
    99  	// so let's create a batch stdin and stdout
   100  	batchStdinReader, batchStdinWriter := io.Pipe()
   101  	batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
   102  	ctx, ctxCancel := context.WithCancel(ctx)
   103  	closed := make(chan struct{})
   104  	cancel := func() {
   105  		ctxCancel()
   106  		_ = batchStdinWriter.Close()
   107  		_ = batchStdoutReader.Close()
   108  		<-closed
   109  	}
   110  
   111  	// Ensure cancel is called as soon as the provided context is cancelled
   112  	go func() {
   113  		<-ctx.Done()
   114  		cancel()
   115  	}()
   116  
   117  	_, filename, line, _ := runtime.Caller(2)
   118  	filename = strings.TrimPrefix(filename, callerPrefix)
   119  
   120  	go func() {
   121  		stderr := strings.Builder{}
   122  		err := NewCommand(ctx, "cat-file", "--batch").
   123  			SetDescription(fmt.Sprintf("%s cat-file --batch [repo_path: %s] (%s:%d)", GitExecutable, repoPath, filename, line)).
   124  			Run(&RunOpts{
   125  				Dir:    repoPath,
   126  				Stdin:  batchStdinReader,
   127  				Stdout: batchStdoutWriter,
   128  				Stderr: &stderr,
   129  			})
   130  		if err != nil {
   131  			_ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
   132  			_ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String()))
   133  		} else {
   134  			_ = batchStdoutWriter.Close()
   135  			_ = batchStdinReader.Close()
   136  		}
   137  		close(closed)
   138  	}()
   139  
   140  	// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
   141  	batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
   142  
   143  	return batchStdinWriter, batchReader, cancel
   144  }
   145  
   146  // ReadBatchLine reads the header line from cat-file --batch
   147  // We expect:
   148  // <sha> SP <type> SP <size> LF
   149  // sha is a 40byte not 20byte here
   150  func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
   151  	typ, err = rd.ReadString('\n')
   152  	if err != nil {
   153  		return
   154  	}
   155  	if len(typ) == 1 {
   156  		typ, err = rd.ReadString('\n')
   157  		if err != nil {
   158  			return
   159  		}
   160  	}
   161  	idx := strings.IndexByte(typ, ' ')
   162  	if idx < 0 {
   163  		log.Debug("missing space typ: %s", typ)
   164  		err = ErrNotExist{ID: string(sha)}
   165  		return
   166  	}
   167  	sha = []byte(typ[:idx])
   168  	typ = typ[idx+1:]
   169  
   170  	idx = strings.IndexByte(typ, ' ')
   171  	if idx < 0 {
   172  		err = ErrNotExist{ID: string(sha)}
   173  		return
   174  	}
   175  
   176  	sizeStr := typ[idx+1 : len(typ)-1]
   177  	typ = typ[:idx]
   178  
   179  	size, err = strconv.ParseInt(sizeStr, 10, 64)
   180  	return
   181  }
   182  
   183  // ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream.
   184  func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) {
   185  	id := ""
   186  	var n int64
   187  headerLoop:
   188  	for {
   189  		line, err := rd.ReadBytes('\n')
   190  		if err != nil {
   191  			return "", err
   192  		}
   193  		n += int64(len(line))
   194  		idx := bytes.Index(line, []byte{' '})
   195  		if idx < 0 {
   196  			continue
   197  		}
   198  
   199  		if string(line[:idx]) == "object" {
   200  			id = string(line[idx+1 : len(line)-1])
   201  			break headerLoop
   202  		}
   203  	}
   204  
   205  	// Discard the rest of the tag
   206  	discard := size - n + 1
   207  	for discard > math.MaxInt32 {
   208  		_, err := rd.Discard(math.MaxInt32)
   209  		if err != nil {
   210  			return id, err
   211  		}
   212  		discard -= math.MaxInt32
   213  	}
   214  	_, err := rd.Discard(int(discard))
   215  	return id, err
   216  }
   217  
   218  // ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream.
   219  func ReadTreeID(rd *bufio.Reader, size int64) (string, error) {
   220  	id := ""
   221  	var n int64
   222  headerLoop:
   223  	for {
   224  		line, err := rd.ReadBytes('\n')
   225  		if err != nil {
   226  			return "", err
   227  		}
   228  		n += int64(len(line))
   229  		idx := bytes.Index(line, []byte{' '})
   230  		if idx < 0 {
   231  			continue
   232  		}
   233  
   234  		if string(line[:idx]) == "tree" {
   235  			id = string(line[idx+1 : len(line)-1])
   236  			break headerLoop
   237  		}
   238  	}
   239  
   240  	// Discard the rest of the commit
   241  	discard := size - n + 1
   242  	for discard > math.MaxInt32 {
   243  		_, err := rd.Discard(math.MaxInt32)
   244  		if err != nil {
   245  			return id, err
   246  		}
   247  		discard -= math.MaxInt32
   248  	}
   249  	_, err := rd.Discard(int(discard))
   250  	return id, err
   251  }
   252  
   253  // git tree files are a list:
   254  // <mode-in-ascii> SP <fname> NUL <20-byte SHA>
   255  //
   256  // Unfortunately this 20-byte notation is somewhat in conflict to all other git tools
   257  // Therefore we need some method to convert these 20-byte SHAs to a 40-byte SHA
   258  
   259  // constant hextable to help quickly convert between 20byte and 40byte hashes
   260  const hextable = "0123456789abcdef"
   261  
   262  // To40ByteSHA converts a 20-byte SHA into a 40-byte sha. Input and output can be the
   263  // same 40 byte slice to support in place conversion without allocations.
   264  // This is at least 100x quicker that hex.EncodeToString
   265  // NB This requires that out is a 40-byte slice
   266  func To40ByteSHA(sha, out []byte) []byte {
   267  	for i := 19; i >= 0; i-- {
   268  		v := sha[i]
   269  		vhi, vlo := v>>4, v&0x0f
   270  		shi, slo := hextable[vhi], hextable[vlo]
   271  		out[i*2], out[i*2+1] = shi, slo
   272  	}
   273  	return out
   274  }
   275  
   276  // ParseTreeLine reads an entry from a tree in a cat-file --batch stream
   277  // This carefully avoids allocations - except where fnameBuf is too small.
   278  // It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
   279  //
   280  // Each line is composed of:
   281  // <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
   282  //
   283  // We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
   284  func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
   285  	var readBytes []byte
   286  
   287  	// Read the Mode & fname
   288  	readBytes, err = rd.ReadSlice('\x00')
   289  	if err != nil {
   290  		return
   291  	}
   292  	idx := bytes.IndexByte(readBytes, ' ')
   293  	if idx < 0 {
   294  		log.Debug("missing space in readBytes ParseTreeLine: %s", readBytes)
   295  
   296  		err = &ErrNotExist{}
   297  		return
   298  	}
   299  
   300  	n += idx + 1
   301  	copy(modeBuf, readBytes[:idx])
   302  	if len(modeBuf) >= idx {
   303  		modeBuf = modeBuf[:idx]
   304  	} else {
   305  		modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
   306  	}
   307  	mode = modeBuf
   308  
   309  	readBytes = readBytes[idx+1:]
   310  
   311  	// Deal with the fname
   312  	copy(fnameBuf, readBytes)
   313  	if len(fnameBuf) > len(readBytes) {
   314  		fnameBuf = fnameBuf[:len(readBytes)]
   315  	} else {
   316  		fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...)
   317  	}
   318  	for err == bufio.ErrBufferFull {
   319  		readBytes, err = rd.ReadSlice('\x00')
   320  		fnameBuf = append(fnameBuf, readBytes...)
   321  	}
   322  	n += len(fnameBuf)
   323  	if err != nil {
   324  		return
   325  	}
   326  	fnameBuf = fnameBuf[:len(fnameBuf)-1]
   327  	fname = fnameBuf
   328  
   329  	// Deal with the 20-byte SHA
   330  	idx = 0
   331  	for idx < 20 {
   332  		read := 0
   333  		read, err = rd.Read(shaBuf[idx:20])
   334  		n += read
   335  		if err != nil {
   336  			return
   337  		}
   338  		idx += read
   339  	}
   340  	sha = shaBuf
   341  	return
   342  }
   343  
   344  var callerPrefix string
   345  
   346  func init() {
   347  	_, filename, _, _ := runtime.Caller(0)
   348  	callerPrefix = strings.TrimSuffix(filename, "modules/git/batch_reader.go")
   349  }