github.com/driusan/dgit@v0.0.0-20221118233547-f39f0c15edbb/git/indexpack.go

github.com/driusan/dgit@v0.0.0-20221118233547-f39f0c15edbb/git/indexpack.go (about)

     1  package git
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"log"
    10  	"os"
    11  	"path/filepath"
    12  	//"runtime"
    13  	"sort"
    14  	"strings"
    15  	"time"
    16  	"unsafe"
    17  
    18  	"compress/flate"
    19  
    20  	"sync"
    21  	"sync/atomic"
    22  
    23  	"container/list"
    24  	"crypto/sha1"
    25  	"encoding/binary"
    26  
    27  	"github.com/driusan/dgit/git/delta"
    28  	"github.com/hashicorp/golang-lru"
    29  	// "hash/crc32"
    30  )
    31  
    32  type IndexPackOptions struct {
    33  	// Display progress information while indexing pack.
    34  	Verbose bool
    35  
    36  	// Output index to this writer. If nil, will be based on
    37  	// the filename.
    38  	Output io.Writer
    39  
    40  	// Fix a "thin" pack produced by git pack-objects --thin
    41  	// (not implemented)
    42  	FixThin bool
    43  
    44  	// A message to store in a .keep file. The string "none"
    45  	// will be interpreted as an empty file, the empty string
    46  	// will be interpreted as do not produce a .keep file.
    47  	Keep string
    48  
    49  	// Not implemented
    50  	IndexVersion int
    51  
    52  	// Die if the pack contains broken links. (Not implemented)
    53  	Strict bool
    54  
    55  	// A number of threads to use for resolving deltas.  The 0-value
    56  	// will use GOMAXPROCS.
    57  	Threads uint
    58  
    59  	// Act as if reading from a non-seekable stream, not a file.
    60  	Stdin bool
    61  }
    62  
    63  type PackfileIndex interface {
    64  	GetObject(i io.ReaderAt, s Sha1) (GitObject, error)
    65  	HasObject(s Sha1) bool
    66  	WriteIndex(w io.Writer) error
    67  	GetTrailer() (Packfile Sha1, Index Sha1)
    68  }
    69  
    70  type PackIndexFanout [256]uint32
    71  type PackfileIndexV2 struct {
    72  	magic   [4]byte // Must be \377tOc
    73  	Version uint32  // Must be 2
    74  
    75  	Fanout PackIndexFanout
    76  
    77  	Sha1Table []Sha1
    78  	CRC32     []uint32
    79  
    80  	// If the MSB is set, it's an index into the next
    81  	// table, otherwise it's an index into the packfile.
    82  	FourByteOffsets  []uint32
    83  	EightByteOffsets []uint64
    84  
    85  	// the objects stream goes here in the file
    86  
    87  	// The trailer from a V1 checksum
    88  	Packfile, IdxFile Sha1
    89  }
    90  
    91  // Gets a list of objects in a pack file according to the index.
    92  func v2PackObjectListFromIndex(idx io.Reader) []Sha1 {
    93  	var pack PackfileIndexV2
    94  	binary.Read(idx, binary.BigEndian, &pack.magic)
    95  	binary.Read(idx, binary.BigEndian, &pack.Version)
    96  	binary.Read(idx, binary.BigEndian, &pack.Fanout)
    97  	pack.Sha1Table = make([]Sha1, pack.Fanout[255])
    98  	// Load the tables. The first three are based on the number of
    99  	// objects in the packfile (stored in Fanout[255]), the last
   100  	// table is dynamicly sized.
   101  
   102  	for i := 0; i < len(pack.Sha1Table); i++ {
   103  		if err := binary.Read(idx, binary.BigEndian, &pack.Sha1Table[i]); err != nil {
   104  			panic(err)
   105  		}
   106  	}
   107  	return pack.Sha1Table
   108  }
   109  
   110  // reads a v2 pack file from r and tells if it has object inside it.
   111  func v2PackIndexHasSha1(c *Client, pfile File, r io.Reader, obj Sha1) bool {
   112  	var pack PackfileIndexV2
   113  	binary.Read(r, binary.BigEndian, &pack.magic)
   114  	binary.Read(r, binary.BigEndian, &pack.Version)
   115  	binary.Read(r, binary.BigEndian, &pack.Fanout)
   116  	pack.Sha1Table = make([]Sha1, pack.Fanout[255])
   117  	pack.CRC32 = make([]uint32, pack.Fanout[255])
   118  	pack.FourByteOffsets = make([]uint32, pack.Fanout[255])
   119  	// Load the tables. The first three are based on the number of
   120  	// objects in the packfile (stored in Fanout[255]), the last
   121  	// table is dynamicly sized.
   122  
   123  	for i := 0; i < len(pack.Sha1Table); i++ {
   124  		if err := binary.Read(r, binary.BigEndian, &pack.Sha1Table[i]); err != nil {
   125  			panic(err)
   126  		}
   127  	}
   128  	for i := 0; i < len(pack.CRC32); i++ {
   129  		if err := binary.Read(r, binary.BigEndian, &pack.CRC32[i]); err != nil {
   130  			panic(err)
   131  		}
   132  	}
   133  	for i := 0; i < len(pack.FourByteOffsets); i++ {
   134  		if err := binary.Read(r, binary.BigEndian, &pack.FourByteOffsets[i]); err != nil {
   135  			panic(err)
   136  		}
   137  		var offset int64
   138  		if pack.FourByteOffsets[i]&(1<<31) != 0 {
   139  			// clear out the MSB to get the offset
   140  			eightbyteOffset := pack.FourByteOffsets[i] ^ (1 << 31)
   141  			if eightbyteOffset&(1<<31) != 0 {
   142  				var val uint64
   143  				binary.Read(r, binary.BigEndian, &val)
   144  				pack.EightByteOffsets = append(pack.EightByteOffsets, val)
   145  				offset = int64(val)
   146  			}
   147  		} else {
   148  			offset = int64(pack.FourByteOffsets[i])
   149  		}
   150  		c.objectCache[pack.Sha1Table[i]] = objectLocation{false, pfile, &pack, offset}
   151  	}
   152  	return pack.HasObject(obj)
   153  }
   154  
   155  func (idx PackfileIndexV2) WriteIndex(w io.Writer) error {
   156  	return idx.writeIndex(w, true)
   157  }
   158  
   159  // Using the index, retrieve an object from the packfile represented by r
   160  // at offset. The index must be valid for this function to work, it can
   161  // not retrieve objects before the index is built (ie. during
   162  // `git index-pack`).
   163  func (idx PackfileIndexV2) getObjectAtOffset(r io.ReaderAt, offset int64, metaOnly bool) (rv GitObject, err error) {
   164  	var p PackfileHeader
   165  
   166  	// 4k should be enough for the header.
   167  	metareader := io.NewSectionReader(r, offset, 4096)
   168  	t, sz, ref, refoffset, rawheader := p.ReadHeaderSize(bufio.NewReader(metareader))
   169  	var rawdata []byte
   170  	// sz is the uncompressed size, so the total size should usually be
   171  	// less than sz for the compressed data. It might theoretically be a
   172  	// little more, but we're generous here since this doesn't allocate
   173  	// anything but just determines how much data the SectionReader will
   174  	// read before returning an EOF.
   175  	//
   176  	// There is still overhead if the underlying ReaderAt reads more data
   177  	// than it needs to and then discards it, so we assume that it won't
   178  	// compress to more than double its original size, and then add a floor
   179  	// of at least 1 disk sector since small objects are more likely to hit
   180  	// degenerate cases for compression, but also less affected by the
   181  	// multplication fudge factor, while a floor of 1 disk sector shouldn't
   182  	// have much effect on disk IO (hopefully.)
   183  	if sz != 0 {
   184  		worstdsize := sz * 2
   185  		if worstdsize < 512 {
   186  			worstdsize = 512
   187  		}
   188  		datareader := io.NewSectionReader(r, offset+int64(len(rawheader)), int64(worstdsize))
   189  		if !metaOnly || t == OBJ_OFS_DELTA || t == OBJ_REF_DELTA {
   190  			//raw, err := p.dataStream(bufio.NewReader(datareader))
   191  			raw, err := p.dataStream(bufio.NewReader(datareader))
   192  			if err != nil {
   193  				return nil, err
   194  			}
   195  			rawdata, err = ioutil.ReadAll(raw)
   196  			if err != nil {
   197  				return nil, err
   198  			}
   199  		}
   200  	} else {
   201  		// If it's size 0, sz*3 would immediately return io.EOF and cause
   202  		// panic, so we just directly make the rawdata slice.
   203  		rawdata = make([]byte, 0)
   204  	}
   205  
   206  	// The way we calculate the hash changes based on if it's a delta
   207  	// or not.
   208  	switch t {
   209  	case OBJ_COMMIT:
   210  		o := GitCommitObject{int(sz), rawdata}
   211  		return o, nil
   212  	case OBJ_TREE:
   213  		o := GitTreeObject{int(sz), rawdata}
   214  		return o, nil
   215  		return GitTreeObject{int(sz), rawdata}, nil
   216  	case OBJ_BLOB:
   217  		o := GitBlobObject{int(sz), rawdata}
   218  		return o, nil
   219  	case OBJ_TAG:
   220  		o := GitTagObject{int(sz), rawdata}
   221  		return o, nil
   222  	case OBJ_OFS_DELTA:
   223  		base, err := idx.getObjectAtOffset(r, offset-int64(refoffset), false)
   224  		if err != nil {
   225  			return nil, err
   226  		}
   227  
   228  		deltareader := delta.NewReader(bytes.NewBuffer(rawdata), bytes.NewReader(base.GetContent()))
   229  		resolved, err := ioutil.ReadAll(&deltareader)
   230  		if err != nil {
   231  			return nil, err
   232  		}
   233  		switch ty := base.GetType(); ty {
   234  		case "commit":
   235  			return GitCommitObject{deltareader.Len(), resolved}, nil
   236  		case "tree":
   237  			return GitTreeObject{deltareader.Len(), resolved}, nil
   238  		case "blob":
   239  			return GitBlobObject{deltareader.Len(), resolved}, nil
   240  		case "tag":
   241  			return GitTagObject{deltareader.Len(), resolved}, nil
   242  		default:
   243  			return nil, InvalidObject
   244  		}
   245  	case OBJ_REF_DELTA:
   246  		var base GitObject
   247  		// This function is only after the index is built, so
   248  		// it should have all referenced objects.
   249  		base, err := idx.GetObject(r, ref)
   250  		if err != nil {
   251  			return nil, err
   252  		}
   253  
   254  		deltareader := delta.NewReader(bytes.NewBuffer(rawdata), bytes.NewReader(base.GetContent()))
   255  		resolved, err := ioutil.ReadAll(&deltareader)
   256  		if err != nil {
   257  			return nil, err
   258  		}
   259  		switch ty := base.GetType(); ty {
   260  		case "commit":
   261  			return GitCommitObject{deltareader.Len(), resolved}, nil
   262  		case "tree":
   263  			return GitTreeObject{deltareader.Len(), resolved}, nil
   264  		case "blob":
   265  			return GitBlobObject{deltareader.Len(), resolved}, nil
   266  		case "tag":
   267  			return GitTagObject{deltareader.Len(), resolved}, nil
   268  		default:
   269  			return nil, InvalidObject
   270  		}
   271  	default:
   272  		return nil, fmt.Errorf("Unhandled object type.")
   273  	}
   274  }
   275  
   276  var ocache *lru.Cache
   277  
   278  func init() {
   279  	// This is a ridiculously small cache, but on large repos
   280  	// it leaks memory like a sieve since Go is GC'd. It's the
   281  	// largest cache I could use to successfully index the pack
   282  	// from https:/github.com/Perl/perl5 on a 2GB vultr node
   283  	// without running out of memory on Go 1.14.2
   284  	ocache, _ = lru.New(250)
   285  }
   286  
   287  type cachedObject struct {
   288  	ResolvedType PackEntryType
   289  	Data         []byte
   290  
   291  	RefOffset int
   292  	Ref       Sha1
   293  }
   294  
   295  // Retrieve an object from the packfile represented by r at offset.
   296  // This will use the specified caches to resolve the location of any
   297  // deltas, not the index itself. They must be maintained by the caller.
   298  var cachedn, cachemiss int
   299  
   300  func (idx PackfileIndexV2) resolveDeltaForIndexing(pack io.ReaderAt, deltat PackEntryType, rawdata []byte, location ObjectOffset, ref Sha1, refoffset int64, cache map[ObjectOffset]*packObject, refcache map[Sha1]*packObject) (t PackEntryType, data io.Reader, osz int64, err error) {
   301  	datareader := bytes.NewBuffer(rawdata)
   302  	switch deltat {
   303  	case OBJ_REF_DELTA:
   304  		parent := refcache[ref]
   305  		parent.deltasResolved++
   306  		t, r, _, err := idx.getObjectAtOffsetForIndexing(pack, int64(parent.location), false, cache, refcache)
   307  		if err != nil {
   308  			return 0, nil, 0, err
   309  		}
   310  		base, err := ioutil.ReadAll(r)
   311  		if err != nil {
   312  			return 0, nil, 0, err
   313  		}
   314  		if parent.deltasAgainst > 0 && parent.deltasAgainst < parent.deltasResolved {
   315  			ocache.Add(parent.location, cachedObject{t, base, 0, Sha1{}})
   316  		}
   317  		deltareader := delta.NewReader(datareader, bytes.NewReader(base))
   318  		return t, &deltareader, int64(deltareader.Len()), err
   319  	case OBJ_OFS_DELTA:
   320  		parent := cache[ObjectOffset(location)-ObjectOffset(refoffset)]
   321  		parent.deltasResolved++
   322  		t, r, _, err := idx.getObjectAtOffsetForIndexing(pack, int64(ObjectOffset(location)-ObjectOffset(refoffset)), false, cache, refcache)
   323  		if err != nil {
   324  			return 0, nil, 0, err
   325  		}
   326  		base, err := ioutil.ReadAll(r)
   327  		if err != nil {
   328  			return 0, nil, 0, err
   329  		}
   330  		if parent.deltasAgainst > 0 && parent.deltasAgainst < parent.deltasResolved {
   331  			ocache.Add(ObjectOffset(location)-ObjectOffset(refoffset), cachedObject{t, base, 0, Sha1{}})
   332  		}
   333  		deltareader := delta.NewReader(datareader, bytes.NewReader(base))
   334  		return t, &deltareader, int64(deltareader.Len()), err
   335  		//return t, &deltareader, int64(sz), err
   336  	default:
   337  		return 0, nil, 0, fmt.Errorf("Unhandled delta type %v: ", t)
   338  	}
   339  }
   340  
   341  func (idx PackfileIndexV2) getObjectAtOffsetForIndexing(r io.ReaderAt, offset int64, metaOnly bool, cache map[ObjectOffset]*packObject, refcache map[Sha1]*packObject) (t PackEntryType, data io.Reader, osz int64, err error) {
   342  	if val, ok := ocache.Get(ObjectOffset(offset)); ok {
   343  		o := val.(cachedObject)
   344  		cachedn++
   345  
   346  		if o.ResolvedType == OBJ_OFS_DELTA || o.ResolvedType == OBJ_REF_DELTA {
   347  			return idx.resolveDeltaForIndexing(r, o.ResolvedType, o.Data, ObjectOffset(offset), o.Ref, int64(o.RefOffset), cache, refcache)
   348  		}
   349  		return o.ResolvedType, bytes.NewReader(o.Data), int64(len(o.Data)), nil
   350  	} else {
   351  		cachemiss++
   352  	}
   353  
   354  	var p PackfileHeader
   355  
   356  	// 4k should be enough for the header.
   357  	var datareader flate.Reader
   358  	metareader := io.NewSectionReader(r, offset, 4096)
   359  	t, sz, ref, refoffset, rawheader := p.ReadHeaderSize(bufio.NewReader(metareader))
   360  	// sz is the uncompressed size, so the total size should usually be
   361  	// less than sz for the compressed data. It might theoretically be a
   362  	// little more, but we're generous here since this doesn't allocate
   363  	// anything but just determines how much data the SectionReader will
   364  	// read before returning an EOF.
   365  	//
   366  	// There is still overhead if the underlying ReaderAt reads more data
   367  	// than it needs to and then discards it, so we assume that it won't
   368  	// compress to more than double its original size, and then add a floor
   369  	// of at least 1 disk sector since small objects are more likely to hit
   370  	// degenerate cases for compression, but also less affected by the
   371  	// multplication fudge factor, while a floor of 1 disk sector shouldn't
   372  	// have much effect on disk IO (hopefully.)
   373  	if sz != 0 {
   374  		worstdsize := sz * 2
   375  		if worstdsize < 512 {
   376  			worstdsize = 512
   377  		}
   378  		if !metaOnly || t == OBJ_OFS_DELTA || t == OBJ_REF_DELTA {
   379  			// dataStream needs a ByteReader, so we wrap
   380  			// the reader in a bufio
   381  			dr, err := p.dataStream(bufio.NewReader(io.NewSectionReader(r, offset+int64(len(rawheader)), int64(worstdsize))))
   382  			if err != nil {
   383  				return 0, nil, 0, err
   384  			}
   385  			datareader = bufio.NewReader(dr)
   386  		}
   387  	} else {
   388  		// If it's size 0, sz*3 would immediately return io.EOF and cause
   389  		// panic, so we just directly make the rawdata slice.
   390  		datareader = bytes.NewBuffer(nil)
   391  	}
   392  
   393  	// The way we calculate the hash changes based on if it's a delta
   394  	// or not.
   395  	switch t {
   396  	case OBJ_COMMIT, OBJ_TREE, OBJ_BLOB, OBJ_TAG:
   397  		return t, datareader, int64(sz), nil
   398  	case OBJ_REF_DELTA, OBJ_OFS_DELTA:
   399  		rawdata, err := ioutil.ReadAll(datareader)
   400  		if err != nil {
   401  			return 0, nil, 0, err
   402  		}
   403  		return idx.resolveDeltaForIndexing(r, t, rawdata, ObjectOffset(offset), ref, int64(refoffset), cache, refcache)
   404  	default:
   405  		return 0, nil, 0, fmt.Errorf("Unhandled object type %v: ", t)
   406  	}
   407  }
   408  
   409  // Find the object in the table.
   410  func (idx PackfileIndexV2) GetObjectMetadata(r io.ReaderAt, s Sha1) (GitObject, error) {
   411  	foundIdx := -1
   412  	startIdx := idx.Fanout[s[0]]
   413  
   414  	// Packfiles are designed so that we could do a binary search here, but
   415  	// we don't need that optimization yet, so just do a linear search through
   416  	// the objects with the same first byte.
   417  	for i := startIdx - 1; idx.Sha1Table[i][0] == s[0]; i-- {
   418  		if s == idx.Sha1Table[i] {
   419  			foundIdx = int(i)
   420  			break
   421  		}
   422  	}
   423  	if foundIdx == -1 {
   424  		return nil, fmt.Errorf("Object not found: %v", s)
   425  	}
   426  
   427  	var offset int64
   428  	if idx.FourByteOffsets[foundIdx]&(1<<31) != 0 {
   429  		// clear out the MSB to get the offset
   430  		eightbyteOffset := idx.FourByteOffsets[foundIdx] ^ (1 << 31)
   431  		offset = int64(idx.EightByteOffsets[eightbyteOffset])
   432  	} else {
   433  		offset = int64(idx.FourByteOffsets[foundIdx])
   434  	}
   435  
   436  	// Now that we've figured out where the object lives, use the packfile
   437  	// to get the value from the packfile.
   438  	return idx.getObjectAtOffset(r, offset, true)
   439  }
   440  
   441  func (idx PackfileIndexV2) GetObject(r io.ReaderAt, s Sha1) (GitObject, error) {
   442  	foundIdx := -1
   443  	startIdx := idx.Fanout[s[0]]
   444  	if startIdx <= 0 {
   445  		// The fanout table holds the number of entries less than x, so we
   446  		// subtract 1 to make sure we don't miss the hash we're looking for,
   447  		// but we need a special case for s[0] == 0 to prevent underflow
   448  		startIdx = 1
   449  	}
   450  
   451  	// Packfiles are designed so that we could do a binary search here, but
   452  	// we don't need that optimization yet, so just do a linear search through
   453  	// the objects with the same first byte.
   454  	for i := startIdx - 1; idx.Sha1Table[i][0] == s[0]; i-- {
   455  		if s == idx.Sha1Table[i] {
   456  			foundIdx = int(i)
   457  			break
   458  		}
   459  	}
   460  
   461  	if foundIdx == -1 {
   462  		return nil, fmt.Errorf("Object not found: %v", s)
   463  	}
   464  
   465  	var offset int64
   466  	if idx.FourByteOffsets[foundIdx]&(1<<31) != 0 {
   467  		// clear out the MSB to get the offset
   468  		eightbyteOffset := idx.FourByteOffsets[foundIdx] ^ (1 << 31)
   469  		offset = int64(idx.EightByteOffsets[eightbyteOffset])
   470  	} else {
   471  		offset = int64(idx.FourByteOffsets[foundIdx])
   472  	}
   473  
   474  	// Now that we've figured out where the object lives, use the packfile
   475  	// to get the value from the packfile.
   476  	return idx.getObjectAtOffset(r, offset, false)
   477  }
   478  
   479  func getPackFileObject(idx io.Reader, packfile io.ReaderAt, s Sha1, metaOnly bool) (GitObject, error) {
   480  	var pack PackfileIndexV2
   481  	if err := binary.Read(idx, binary.BigEndian, &pack.magic); err != nil {
   482  		return nil, err
   483  	}
   484  	if err := binary.Read(idx, binary.BigEndian, &pack.Version); err != nil {
   485  		return nil, err
   486  	}
   487  	if err := binary.Read(idx, binary.BigEndian, &pack.Fanout); err != nil {
   488  		return nil, err
   489  	}
   490  	pack.Sha1Table = make([]Sha1, pack.Fanout[255])
   491  	pack.CRC32 = make([]uint32, pack.Fanout[255])
   492  	pack.FourByteOffsets = make([]uint32, pack.Fanout[255])
   493  	// Load the tables. The first three are based on the number of
   494  	// objects in the packfile (stored in Fanout[255]), the last
   495  	// table is dynamicly sized.
   496  
   497  	for i := 0; i < len(pack.Sha1Table); i++ {
   498  		if err := binary.Read(idx, binary.BigEndian, &pack.Sha1Table[i]); err != nil {
   499  			return nil, err
   500  		}
   501  	}
   502  	for i := 0; i < len(pack.CRC32); i++ {
   503  		if err := binary.Read(idx, binary.BigEndian, &pack.CRC32[i]); err != nil {
   504  			return nil, err
   505  		}
   506  	}
   507  	for i := 0; i < len(pack.FourByteOffsets); i++ {
   508  		if err := binary.Read(idx, binary.BigEndian, &pack.FourByteOffsets[i]); err != nil {
   509  			return nil, err
   510  		}
   511  	}
   512  
   513  	// The number of eight byte offsets is dynamic, based on how many
   514  	// four byte offsets have the MSB set.
   515  	for _, offset := range pack.FourByteOffsets {
   516  		if offset&(1<<31) != 0 {
   517  			var val uint64
   518  			binary.Read(idx, binary.BigEndian, &val)
   519  			pack.EightByteOffsets = append(pack.EightByteOffsets, val)
   520  		}
   521  	}
   522  	if metaOnly {
   523  		return pack.GetObjectMetadata(packfile, s)
   524  	}
   525  	return pack.GetObject(packfile, s)
   526  }
   527  
   528  func (idx PackfileIndexV2) GetTrailer() (Sha1, Sha1) {
   529  	return idx.Packfile, idx.IdxFile
   530  }
   531  
   532  func (idx PackfileIndexV2) writeIndex(w io.Writer, withTrailer bool) error {
   533  	if err := binary.Write(w, binary.BigEndian, idx.magic); err != nil {
   534  		return err
   535  	}
   536  	if err := binary.Write(w, binary.BigEndian, idx.Version); err != nil {
   537  		return err
   538  	}
   539  	for _, fanout := range idx.Fanout {
   540  		if err := binary.Write(w, binary.BigEndian, fanout); err != nil {
   541  			return err
   542  		}
   543  	}
   544  	for _, sha := range idx.Sha1Table {
   545  		if err := binary.Write(w, binary.BigEndian, sha); err != nil {
   546  			return err
   547  		}
   548  	}
   549  	for _, crc32 := range idx.CRC32 {
   550  		if err := binary.Write(w, binary.BigEndian, crc32); err != nil {
   551  			return err
   552  		}
   553  	}
   554  	for _, offset := range idx.FourByteOffsets {
   555  		if err := binary.Write(w, binary.BigEndian, offset); err != nil {
   556  			return err
   557  		}
   558  	}
   559  	for _, offset := range idx.EightByteOffsets {
   560  		if err := binary.Write(w, binary.BigEndian, offset); err != nil {
   561  			return err
   562  		}
   563  	}
   564  	if err := binary.Write(w, binary.BigEndian, idx.Packfile); err != nil {
   565  		return err
   566  	}
   567  	if withTrailer {
   568  		if err := binary.Write(w, binary.BigEndian, idx.IdxFile); err != nil {
   569  			return err
   570  		}
   571  	}
   572  	return nil
   573  }
   574  func (idx PackfileIndexV2) HasObject(s Sha1) bool {
   575  	startIdx := idx.Fanout[s[0]]
   576  	if startIdx <= 0 {
   577  		// The fanout table holds the number of entries less than x, so we
   578  		// subtract 1 to make sure we don't miss the hash we're looking for,
   579  		// but we need a special case for s[0] == 0 to prevent underflow
   580  		startIdx = 1
   581  	}
   582  
   583  	// Packfiles are designed so that we could do a binary search here, but
   584  	// we don't need that optimization yet, so just do a linear search through
   585  	// the objects with the same first byte.
   586  	for i := int(startIdx - 1); i >= 0 && idx.Sha1Table[i][0] == s[0]; i-- {
   587  		if s == idx.Sha1Table[i] {
   588  			return true
   589  		}
   590  	}
   591  	return false
   592  }
   593  
   594  // Implements the Sorter interface on PackfileIndexV2, in order to sort the
   595  // Sha1, CRC32, and
   596  func (p *PackfileIndexV2) Len() int {
   597  	return int(p.Fanout[255])
   598  }
   599  
   600  func (p *PackfileIndexV2) Swap(i, j int) {
   601  	p.Sha1Table[i], p.Sha1Table[j] = p.Sha1Table[j], p.Sha1Table[i]
   602  	p.CRC32[i], p.CRC32[j] = p.CRC32[j], p.CRC32[i]
   603  	p.FourByteOffsets[i], p.FourByteOffsets[j] = p.FourByteOffsets[j], p.FourByteOffsets[i]
   604  }
   605  
   606  func (p *PackfileIndexV2) Less(i, j int) bool {
   607  	for k := 0; k < 20; k++ {
   608  		if p.Sha1Table[i][k] < p.Sha1Table[j][k] {
   609  			return true
   610  		} else if p.Sha1Table[i][k] > p.Sha1Table[j][k] {
   611  			return false
   612  		}
   613  	}
   614  	return false
   615  }
   616  
   617  // calculates and stores the trailer into the packfile.
   618  func (p *PackfileIndexV2) calculateTrailer() error {
   619  	trailer := sha1.New()
   620  	if err := p.writeIndex(trailer, false); err != nil {
   621  		return err
   622  	}
   623  	t, err := Sha1FromSlice(trailer.Sum(nil))
   624  	if err != nil {
   625  		return err
   626  	}
   627  	p.IdxFile = t
   628  	return nil
   629  }
   630  
   631  // Update both the fanout table and Sha1Table for this index.
   632  func (idx *PackfileIndexV2) updateFanout(i int, val Sha1) {
   633  	for j := int(val[0]); j < 256; j++ {
   634  		atomic.AddUint32(&idx.Fanout[j], 1)
   635  	}
   636  
   637  	// SHA1 is 160 bits.. since we know no one else is writing here,
   638  	// we pretend it's 2 64 bit ints and a 32 bit int so that we can
   639  	// use atomic writes instead of a lock.
   640  	atomic.StoreUint64((*uint64)(unsafe.Pointer(&idx.Sha1Table[i][0])), *(*uint64)(unsafe.Pointer(&val[0])))
   641  	atomic.StoreUint64((*uint64)(unsafe.Pointer(&idx.Sha1Table[i][8])), *(*uint64)(unsafe.Pointer(&val[8])))
   642  	atomic.StoreUint32((*uint32)(unsafe.Pointer(&idx.Sha1Table[i][16])), *(*uint32)(unsafe.Pointer(&val[16])))
   643  }
   644  
   645  func IndexPack(c *Client, opts IndexPackOptions, r io.Reader) (idx PackfileIndex, rerr error) {
   646  	isfile := false
   647  	if f, ok := r.(*os.File); ok && !opts.Stdin {
   648  		// os.Stdin isn *os.File, but we want to consider it a stream.
   649  		isfile = (f != os.Stdin)
   650  	}
   651  
   652  	// If --verbose is set, keep track of the time to output
   653  	// a x kb/s in the output.
   654  	var startTime time.Time
   655  	if opts.Verbose {
   656  		startTime = time.Now()
   657  	}
   658  
   659  	deltas := list.New()
   660  	indexfile, initcb, icb, crc32cb, priorObjects, priorLocations := indexClosure(c, opts, deltas)
   661  
   662  	cb := func(r io.ReaderAt, i, n int, loc int64, t PackEntryType, sz PackEntrySize, ref Sha1, offset ObjectOffset, data []byte) error {
   663  		if !isfile && opts.Verbose {
   664  			now := time.Now()
   665  			elapsed := now.Unix() - startTime.Unix()
   666  			if elapsed == 0 {
   667  				progressF("Receiving objects: %2.f%% (%d/%d)", i+1 == n, (float32(i+1) / float32(n) * 100), i+1, n)
   668  			} else {
   669  				bps := loc / elapsed
   670  				progressF("Receiving objects: %2.f%% (%d/%d), %v | %v/s", i+1 == n, (float32(i+1) / float32(n) * 100), i+1, n, formatBytes(loc), formatBytes(bps))
   671  
   672  			}
   673  		}
   674  		return icb(r, i, n, loc, t, sz, ref, offset, data)
   675  	}
   676  
   677  	trailerCB := func(r io.ReaderAt, n int, trailer Sha1) error {
   678  		i := 0
   679  		for e := deltas.Front(); e != nil; e = e.Next() {
   680  			i++
   681  			delta := e.Value.(*packObject)
   682  			if opts.Verbose {
   683  				progressF("Resolving deltas: %2.f%% (%d/%d)", i+1 == deltas.Len(), (float32(i+1) / float32(deltas.Len()) * 100), i+1, deltas.Len())
   684  			}
   685  
   686  			t, r, sz, err := indexfile.getObjectAtOffsetForIndexing(r, int64(delta.location), false, priorLocations, priorObjects)
   687  			if err != nil {
   688  				return err
   689  			}
   690  
   691  			var buf bytes.Buffer
   692  			if delta.deltasAgainst > 0 && delta.deltasAgainst < delta.deltasResolved {
   693  				r = io.TeeReader(r, &buf)
   694  			}
   695  			sha1, err := HashReaderWithSize(t.String(), sz, r)
   696  			if err != nil {
   697  				return err
   698  			}
   699  			if delta.deltasAgainst > 0 && delta.deltasAgainst < delta.deltasResolved {
   700  				ocache.Add(ObjectOffset(delta.location), cachedObject{t, buf.Bytes(), 0, Sha1{}})
   701  			}
   702  			delta.oid = sha1
   703  			priorObjects[sha1] = delta
   704  			indexfile.updateFanout(delta.idx, sha1)
   705  		}
   706  
   707  		indexfile.Packfile = trailer
   708  
   709  		//	println("Cached reads", cachedn, " Cache misses", cachemiss)
   710  		sort.Sort(indexfile)
   711  		// The sorting may have changed things, so as a final pass, hash
   712  		// everything in the index to get the trailer (instead of doing it
   713  		// while we were calculating it.)
   714  		if err := indexfile.calculateTrailer(); err != nil {
   715  			return err
   716  		}
   717  		return nil
   718  	}
   719  
   720  	pack, err := iteratePack(c, r, initcb, cb, trailerCB, crc32cb)
   721  	if err != nil {
   722  		//	println("err: Cached reads", cachedn, " Cache misses", cachemiss)
   723  		return nil, err
   724  	}
   725  	defer pack.Close()
   726  
   727  	// Write the index to disk and return
   728  	var basename, idxname string
   729  	if f, ok := r.(*os.File); ok && isfile && !opts.Stdin {
   730  		basename = pack.Name()
   731  		basename = strings.TrimSuffix(f.Name(), ".pack")
   732  		idxname = basename + ".idx"
   733  	} else {
   734  		packhash, _ := indexfile.GetTrailer()
   735  		basename := filepath.Join(c.ObjectDir, "pack", fmt.Sprintf("pack-%s", packhash))
   736  		idxname = basename + ".idx"
   737  
   738  		if opts.Keep != "" {
   739  			if err := ioutil.WriteFile(basename+".keep", []byte(opts.Keep+"\n"), 0755); err != nil {
   740  				return indexfile, err
   741  			}
   742  		}
   743  
   744  		pack.Close()
   745  		if err := os.Rename(pack.Name(), basename+".pack"); err != nil {
   746  			return indexfile, err
   747  		}
   748  	}
   749  
   750  	if opts.Output == nil {
   751  		o, err := os.Create(idxname)
   752  		if err != nil {
   753  			return indexfile, err
   754  		}
   755  		defer o.Close()
   756  		opts.Output = o
   757  	}
   758  
   759  	if err := indexfile.WriteIndex(opts.Output); err != nil {
   760  		return indexfile, err
   761  	}
   762  	return indexfile, err
   763  }
   764  
   765  type packObject struct {
   766  	idx                           int
   767  	oid                           Sha1
   768  	location                      ObjectOffset
   769  	deltasAgainst, deltasResolved int
   770  	baselocation                  ObjectOffset
   771  	typ                           PackEntryType
   772  }
   773  
   774  func indexClosure(c *Client, opts IndexPackOptions, deltas *list.List) (*PackfileIndexV2, func(int), packIterator, func(int, uint32) error, map[Sha1]*packObject, map[ObjectOffset]*packObject) {
   775  	var indexfile PackfileIndexV2
   776  
   777  	indexfile.magic = [4]byte{0377, 't', 'O', 'c'}
   778  	indexfile.Version = 2
   779  
   780  	var mu sync.Mutex
   781  
   782  	// For REF_DELTA to resolve
   783  	priorObjects := make(map[Sha1]*packObject)
   784  	// For OFS_DELTA to resolve
   785  	priorLocations := make(map[ObjectOffset]*packObject)
   786  
   787  	icb := func(n int) {
   788  		indexfile.Sha1Table = make([]Sha1, n)
   789  		indexfile.CRC32 = make([]uint32, n)
   790  		indexfile.FourByteOffsets = make([]uint32, n)
   791  
   792  		// See note in init function about the LRU causing
   793  		// memory leaks. If it's a small pack, we don't care,
   794  		// but on very large repos we need to use a small
   795  		// cache to avoid running out of memory.
   796  		//
   797  		// Chains of deltas tend to be close together it does
   798  		// still give us some benefit even with a small cache.
   799  		if n > 100000 {
   800  			ocache, _ = lru.New(250)
   801  		} else {
   802  			ocache, _ = lru.New(5000)
   803  		}
   804  	}
   805  
   806  	cb := func(r io.ReaderAt, i, n int, location int64, t PackEntryType, sz PackEntrySize, ref Sha1, offset ObjectOffset, rawdata []byte) error {
   807  		if opts.Verbose {
   808  			progressF("Indexing objects: %2.f%% (%d/%d)", i+1 == n, (float32(i+1) / float32(n) * 100), i+1, n)
   809  		}
   810  
   811  		if location < (1 << 31) {
   812  			atomic.StoreUint32(&indexfile.FourByteOffsets[i], uint32(location))
   813  		} else {
   814  			atomic.StoreUint32(&indexfile.FourByteOffsets[i], uint32(len(indexfile.EightByteOffsets))|(1<<31))
   815  			mu.Lock()
   816  			indexfile.EightByteOffsets = append(indexfile.EightByteOffsets, uint64(location))
   817  			mu.Unlock()
   818  		}
   819  
   820  		switch t {
   821  		case OBJ_COMMIT, OBJ_TREE, OBJ_BLOB, OBJ_TAG:
   822  			ocache.Add(ObjectOffset(location), cachedObject{t, rawdata, 0, Sha1{}})
   823  			sha1, err := HashReaderWithSize(t.String(), int64(len(rawdata)), bytes.NewReader(rawdata))
   824  			if err != nil && opts.Strict {
   825  				return err
   826  			}
   827  
   828  			indexfile.updateFanout(i, sha1)
   829  			// Maintain the list of references for delta chains.
   830  			// There's a possibility a delta refers to a reference
   831  			// before the reference in packs inflated from thin packs,
   832  			// so we need to check if it exists before blindly
   833  			// setting it.
   834  			// If it's been already been referenced, cache it.
   835  			// Otherwise don't to save memory and only cache if
   836  			// there are references to it.
   837  			mu.Lock()
   838  			objCache := &packObject{
   839  				idx:      i,
   840  				oid:      sha1,
   841  				location: ObjectOffset(location),
   842  			}
   843  			if o, ok := priorObjects[sha1]; !ok {
   844  				priorObjects[sha1] = objCache
   845  			} else {
   846  				// We have the lock and we know no one is reading
   847  				// these until we're done the first round of
   848  				// indexing anyways, so we don't bother to use
   849  				// the atomic package.
   850  				o.location = ObjectOffset(location)
   851  				o.idx = i
   852  			}
   853  			priorLocations[objCache.location] = objCache
   854  			mu.Unlock()
   855  		case OBJ_REF_DELTA:
   856  			log.Printf("Noting REF_DELTA to resolve: %v\n", ref)
   857  			mu.Lock()
   858  			o, ok := priorObjects[ref]
   859  			if !ok {
   860  				// It hasn't been seen yet, so just note
   861  				// that there's a a delta against it for
   862  				// later.
   863  				// Since we haven't seen it yet, we don't
   864  				// have a location.
   865  				objCache := &packObject{
   866  					oid:            ref,
   867  					deltasAgainst:  1,
   868  					deltasResolved: 0,
   869  				}
   870  				priorObjects[ref] = objCache
   871  			} else {
   872  				o.deltasAgainst += 1
   873  			}
   874  			self := &packObject{
   875  				idx:            i,
   876  				location:       ObjectOffset(location),
   877  				deltasAgainst:  0,
   878  				deltasResolved: 0,
   879  				typ:            t,
   880  			}
   881  			priorLocations[ObjectOffset(location)] = self
   882  			deltas.PushBack(self)
   883  			mu.Unlock()
   884  		case OBJ_OFS_DELTA:
   885  			log.Printf("Noting OFS_DELTA to resolve from %v\n", location-int64(offset))
   886  			mu.Lock()
   887  			// Adjust the number of deltas against the parent
   888  			// priorLocations should always be populated with
   889  			// the prior objects (even if some fields aren't
   890  			// populated), and offets are always looking back
   891  			// into the packfile, so this shouldn't happen.
   892  			if o, ok := priorLocations[ObjectOffset(location-int64(offset))]; !ok {
   893  				panic("Can not determine delta base")
   894  			} else {
   895  				o.deltasAgainst += 1
   896  			}
   897  
   898  			// Add ourselves to the map for future deltas
   899  			self := &packObject{
   900  				idx:            i,
   901  				location:       ObjectOffset(location),
   902  				deltasAgainst:  0,
   903  				deltasResolved: 0,
   904  				baselocation:   ObjectOffset(location) - ObjectOffset(offset),
   905  				typ:            t,
   906  			}
   907  			priorLocations[ObjectOffset(location)] = self
   908  			deltas.PushBack(self)
   909  			mu.Unlock()
   910  		default:
   911  			panic("Unhandled type in IndexPack: " + t.String())
   912  		}
   913  		return nil
   914  	}
   915  	crc32cb := func(i int, crc uint32) error {
   916  		indexfile.CRC32[i] = crc
   917  		return nil
   918  	}
   919  	return &indexfile, icb, cb, crc32cb, priorObjects, priorLocations
   920  }
   921  
   922  // Indexes the pack, and stores a copy in Client's .git/objects/pack directory as it's
   923  // doing so. This is the equivalent of "git index-pack --stdin", but works with any
   924  // reader.
   925  func IndexAndCopyPack(c *Client, opts IndexPackOptions, r io.Reader) (PackfileIndex, error) {
   926  	return IndexPack(c, opts, r)
   927  }
   928  
   929  func formatBytes(n int64) string {
   930  	if n <= 1024 {
   931  		return fmt.Sprintf("%v B", n)
   932  	} else if n <= 1024*1024 {
   933  		return fmt.Sprintf("%.2f KiB", float64(n)/float64(1024))
   934  	} else if n <= 1024*1024*1024 {
   935  		return fmt.Sprintf("%.2f MiB", float64(n)/float64(1024*1024))
   936  	}
   937  	return fmt.Sprintf("%.2f GiB", float64(n)/float64(1024*1024*1024))
   938  }