github.com/driusan/dgit@v0.0.0-20221118233547-f39f0c15edbb/git/indexpack.go (about) 1 package git 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "log" 10 "os" 11 "path/filepath" 12 //"runtime" 13 "sort" 14 "strings" 15 "time" 16 "unsafe" 17 18 "compress/flate" 19 20 "sync" 21 "sync/atomic" 22 23 "container/list" 24 "crypto/sha1" 25 "encoding/binary" 26 27 "github.com/driusan/dgit/git/delta" 28 "github.com/hashicorp/golang-lru" 29 // "hash/crc32" 30 ) 31 32 type IndexPackOptions struct { 33 // Display progress information while indexing pack. 34 Verbose bool 35 36 // Output index to this writer. If nil, will be based on 37 // the filename. 38 Output io.Writer 39 40 // Fix a "thin" pack produced by git pack-objects --thin 41 // (not implemented) 42 FixThin bool 43 44 // A message to store in a .keep file. The string "none" 45 // will be interpreted as an empty file, the empty string 46 // will be interpreted as do not produce a .keep file. 47 Keep string 48 49 // Not implemented 50 IndexVersion int 51 52 // Die if the pack contains broken links. (Not implemented) 53 Strict bool 54 55 // A number of threads to use for resolving deltas. The 0-value 56 // will use GOMAXPROCS. 57 Threads uint 58 59 // Act as if reading from a non-seekable stream, not a file. 60 Stdin bool 61 } 62 63 type PackfileIndex interface { 64 GetObject(i io.ReaderAt, s Sha1) (GitObject, error) 65 HasObject(s Sha1) bool 66 WriteIndex(w io.Writer) error 67 GetTrailer() (Packfile Sha1, Index Sha1) 68 } 69 70 type PackIndexFanout [256]uint32 71 type PackfileIndexV2 struct { 72 magic [4]byte // Must be \377tOc 73 Version uint32 // Must be 2 74 75 Fanout PackIndexFanout 76 77 Sha1Table []Sha1 78 CRC32 []uint32 79 80 // If the MSB is set, it's an index into the next 81 // table, otherwise it's an index into the packfile. 82 FourByteOffsets []uint32 83 EightByteOffsets []uint64 84 85 // the objects stream goes here in the file 86 87 // The trailer from a V1 checksum 88 Packfile, IdxFile Sha1 89 } 90 91 // Gets a list of objects in a pack file according to the index. 92 func v2PackObjectListFromIndex(idx io.Reader) []Sha1 { 93 var pack PackfileIndexV2 94 binary.Read(idx, binary.BigEndian, &pack.magic) 95 binary.Read(idx, binary.BigEndian, &pack.Version) 96 binary.Read(idx, binary.BigEndian, &pack.Fanout) 97 pack.Sha1Table = make([]Sha1, pack.Fanout[255]) 98 // Load the tables. The first three are based on the number of 99 // objects in the packfile (stored in Fanout[255]), the last 100 // table is dynamicly sized. 101 102 for i := 0; i < len(pack.Sha1Table); i++ { 103 if err := binary.Read(idx, binary.BigEndian, &pack.Sha1Table[i]); err != nil { 104 panic(err) 105 } 106 } 107 return pack.Sha1Table 108 } 109 110 // reads a v2 pack file from r and tells if it has object inside it. 111 func v2PackIndexHasSha1(c *Client, pfile File, r io.Reader, obj Sha1) bool { 112 var pack PackfileIndexV2 113 binary.Read(r, binary.BigEndian, &pack.magic) 114 binary.Read(r, binary.BigEndian, &pack.Version) 115 binary.Read(r, binary.BigEndian, &pack.Fanout) 116 pack.Sha1Table = make([]Sha1, pack.Fanout[255]) 117 pack.CRC32 = make([]uint32, pack.Fanout[255]) 118 pack.FourByteOffsets = make([]uint32, pack.Fanout[255]) 119 // Load the tables. The first three are based on the number of 120 // objects in the packfile (stored in Fanout[255]), the last 121 // table is dynamicly sized. 122 123 for i := 0; i < len(pack.Sha1Table); i++ { 124 if err := binary.Read(r, binary.BigEndian, &pack.Sha1Table[i]); err != nil { 125 panic(err) 126 } 127 } 128 for i := 0; i < len(pack.CRC32); i++ { 129 if err := binary.Read(r, binary.BigEndian, &pack.CRC32[i]); err != nil { 130 panic(err) 131 } 132 } 133 for i := 0; i < len(pack.FourByteOffsets); i++ { 134 if err := binary.Read(r, binary.BigEndian, &pack.FourByteOffsets[i]); err != nil { 135 panic(err) 136 } 137 var offset int64 138 if pack.FourByteOffsets[i]&(1<<31) != 0 { 139 // clear out the MSB to get the offset 140 eightbyteOffset := pack.FourByteOffsets[i] ^ (1 << 31) 141 if eightbyteOffset&(1<<31) != 0 { 142 var val uint64 143 binary.Read(r, binary.BigEndian, &val) 144 pack.EightByteOffsets = append(pack.EightByteOffsets, val) 145 offset = int64(val) 146 } 147 } else { 148 offset = int64(pack.FourByteOffsets[i]) 149 } 150 c.objectCache[pack.Sha1Table[i]] = objectLocation{false, pfile, &pack, offset} 151 } 152 return pack.HasObject(obj) 153 } 154 155 func (idx PackfileIndexV2) WriteIndex(w io.Writer) error { 156 return idx.writeIndex(w, true) 157 } 158 159 // Using the index, retrieve an object from the packfile represented by r 160 // at offset. The index must be valid for this function to work, it can 161 // not retrieve objects before the index is built (ie. during 162 // `git index-pack`). 163 func (idx PackfileIndexV2) getObjectAtOffset(r io.ReaderAt, offset int64, metaOnly bool) (rv GitObject, err error) { 164 var p PackfileHeader 165 166 // 4k should be enough for the header. 167 metareader := io.NewSectionReader(r, offset, 4096) 168 t, sz, ref, refoffset, rawheader := p.ReadHeaderSize(bufio.NewReader(metareader)) 169 var rawdata []byte 170 // sz is the uncompressed size, so the total size should usually be 171 // less than sz for the compressed data. It might theoretically be a 172 // little more, but we're generous here since this doesn't allocate 173 // anything but just determines how much data the SectionReader will 174 // read before returning an EOF. 175 // 176 // There is still overhead if the underlying ReaderAt reads more data 177 // than it needs to and then discards it, so we assume that it won't 178 // compress to more than double its original size, and then add a floor 179 // of at least 1 disk sector since small objects are more likely to hit 180 // degenerate cases for compression, but also less affected by the 181 // multplication fudge factor, while a floor of 1 disk sector shouldn't 182 // have much effect on disk IO (hopefully.) 183 if sz != 0 { 184 worstdsize := sz * 2 185 if worstdsize < 512 { 186 worstdsize = 512 187 } 188 datareader := io.NewSectionReader(r, offset+int64(len(rawheader)), int64(worstdsize)) 189 if !metaOnly || t == OBJ_OFS_DELTA || t == OBJ_REF_DELTA { 190 //raw, err := p.dataStream(bufio.NewReader(datareader)) 191 raw, err := p.dataStream(bufio.NewReader(datareader)) 192 if err != nil { 193 return nil, err 194 } 195 rawdata, err = ioutil.ReadAll(raw) 196 if err != nil { 197 return nil, err 198 } 199 } 200 } else { 201 // If it's size 0, sz*3 would immediately return io.EOF and cause 202 // panic, so we just directly make the rawdata slice. 203 rawdata = make([]byte, 0) 204 } 205 206 // The way we calculate the hash changes based on if it's a delta 207 // or not. 208 switch t { 209 case OBJ_COMMIT: 210 o := GitCommitObject{int(sz), rawdata} 211 return o, nil 212 case OBJ_TREE: 213 o := GitTreeObject{int(sz), rawdata} 214 return o, nil 215 return GitTreeObject{int(sz), rawdata}, nil 216 case OBJ_BLOB: 217 o := GitBlobObject{int(sz), rawdata} 218 return o, nil 219 case OBJ_TAG: 220 o := GitTagObject{int(sz), rawdata} 221 return o, nil 222 case OBJ_OFS_DELTA: 223 base, err := idx.getObjectAtOffset(r, offset-int64(refoffset), false) 224 if err != nil { 225 return nil, err 226 } 227 228 deltareader := delta.NewReader(bytes.NewBuffer(rawdata), bytes.NewReader(base.GetContent())) 229 resolved, err := ioutil.ReadAll(&deltareader) 230 if err != nil { 231 return nil, err 232 } 233 switch ty := base.GetType(); ty { 234 case "commit": 235 return GitCommitObject{deltareader.Len(), resolved}, nil 236 case "tree": 237 return GitTreeObject{deltareader.Len(), resolved}, nil 238 case "blob": 239 return GitBlobObject{deltareader.Len(), resolved}, nil 240 case "tag": 241 return GitTagObject{deltareader.Len(), resolved}, nil 242 default: 243 return nil, InvalidObject 244 } 245 case OBJ_REF_DELTA: 246 var base GitObject 247 // This function is only after the index is built, so 248 // it should have all referenced objects. 249 base, err := idx.GetObject(r, ref) 250 if err != nil { 251 return nil, err 252 } 253 254 deltareader := delta.NewReader(bytes.NewBuffer(rawdata), bytes.NewReader(base.GetContent())) 255 resolved, err := ioutil.ReadAll(&deltareader) 256 if err != nil { 257 return nil, err 258 } 259 switch ty := base.GetType(); ty { 260 case "commit": 261 return GitCommitObject{deltareader.Len(), resolved}, nil 262 case "tree": 263 return GitTreeObject{deltareader.Len(), resolved}, nil 264 case "blob": 265 return GitBlobObject{deltareader.Len(), resolved}, nil 266 case "tag": 267 return GitTagObject{deltareader.Len(), resolved}, nil 268 default: 269 return nil, InvalidObject 270 } 271 default: 272 return nil, fmt.Errorf("Unhandled object type.") 273 } 274 } 275 276 var ocache *lru.Cache 277 278 func init() { 279 // This is a ridiculously small cache, but on large repos 280 // it leaks memory like a sieve since Go is GC'd. It's the 281 // largest cache I could use to successfully index the pack 282 // from https:/github.com/Perl/perl5 on a 2GB vultr node 283 // without running out of memory on Go 1.14.2 284 ocache, _ = lru.New(250) 285 } 286 287 type cachedObject struct { 288 ResolvedType PackEntryType 289 Data []byte 290 291 RefOffset int 292 Ref Sha1 293 } 294 295 // Retrieve an object from the packfile represented by r at offset. 296 // This will use the specified caches to resolve the location of any 297 // deltas, not the index itself. They must be maintained by the caller. 298 var cachedn, cachemiss int 299 300 func (idx PackfileIndexV2) resolveDeltaForIndexing(pack io.ReaderAt, deltat PackEntryType, rawdata []byte, location ObjectOffset, ref Sha1, refoffset int64, cache map[ObjectOffset]*packObject, refcache map[Sha1]*packObject) (t PackEntryType, data io.Reader, osz int64, err error) { 301 datareader := bytes.NewBuffer(rawdata) 302 switch deltat { 303 case OBJ_REF_DELTA: 304 parent := refcache[ref] 305 parent.deltasResolved++ 306 t, r, _, err := idx.getObjectAtOffsetForIndexing(pack, int64(parent.location), false, cache, refcache) 307 if err != nil { 308 return 0, nil, 0, err 309 } 310 base, err := ioutil.ReadAll(r) 311 if err != nil { 312 return 0, nil, 0, err 313 } 314 if parent.deltasAgainst > 0 && parent.deltasAgainst < parent.deltasResolved { 315 ocache.Add(parent.location, cachedObject{t, base, 0, Sha1{}}) 316 } 317 deltareader := delta.NewReader(datareader, bytes.NewReader(base)) 318 return t, &deltareader, int64(deltareader.Len()), err 319 case OBJ_OFS_DELTA: 320 parent := cache[ObjectOffset(location)-ObjectOffset(refoffset)] 321 parent.deltasResolved++ 322 t, r, _, err := idx.getObjectAtOffsetForIndexing(pack, int64(ObjectOffset(location)-ObjectOffset(refoffset)), false, cache, refcache) 323 if err != nil { 324 return 0, nil, 0, err 325 } 326 base, err := ioutil.ReadAll(r) 327 if err != nil { 328 return 0, nil, 0, err 329 } 330 if parent.deltasAgainst > 0 && parent.deltasAgainst < parent.deltasResolved { 331 ocache.Add(ObjectOffset(location)-ObjectOffset(refoffset), cachedObject{t, base, 0, Sha1{}}) 332 } 333 deltareader := delta.NewReader(datareader, bytes.NewReader(base)) 334 return t, &deltareader, int64(deltareader.Len()), err 335 //return t, &deltareader, int64(sz), err 336 default: 337 return 0, nil, 0, fmt.Errorf("Unhandled delta type %v: ", t) 338 } 339 } 340 341 func (idx PackfileIndexV2) getObjectAtOffsetForIndexing(r io.ReaderAt, offset int64, metaOnly bool, cache map[ObjectOffset]*packObject, refcache map[Sha1]*packObject) (t PackEntryType, data io.Reader, osz int64, err error) { 342 if val, ok := ocache.Get(ObjectOffset(offset)); ok { 343 o := val.(cachedObject) 344 cachedn++ 345 346 if o.ResolvedType == OBJ_OFS_DELTA || o.ResolvedType == OBJ_REF_DELTA { 347 return idx.resolveDeltaForIndexing(r, o.ResolvedType, o.Data, ObjectOffset(offset), o.Ref, int64(o.RefOffset), cache, refcache) 348 } 349 return o.ResolvedType, bytes.NewReader(o.Data), int64(len(o.Data)), nil 350 } else { 351 cachemiss++ 352 } 353 354 var p PackfileHeader 355 356 // 4k should be enough for the header. 357 var datareader flate.Reader 358 metareader := io.NewSectionReader(r, offset, 4096) 359 t, sz, ref, refoffset, rawheader := p.ReadHeaderSize(bufio.NewReader(metareader)) 360 // sz is the uncompressed size, so the total size should usually be 361 // less than sz for the compressed data. It might theoretically be a 362 // little more, but we're generous here since this doesn't allocate 363 // anything but just determines how much data the SectionReader will 364 // read before returning an EOF. 365 // 366 // There is still overhead if the underlying ReaderAt reads more data 367 // than it needs to and then discards it, so we assume that it won't 368 // compress to more than double its original size, and then add a floor 369 // of at least 1 disk sector since small objects are more likely to hit 370 // degenerate cases for compression, but also less affected by the 371 // multplication fudge factor, while a floor of 1 disk sector shouldn't 372 // have much effect on disk IO (hopefully.) 373 if sz != 0 { 374 worstdsize := sz * 2 375 if worstdsize < 512 { 376 worstdsize = 512 377 } 378 if !metaOnly || t == OBJ_OFS_DELTA || t == OBJ_REF_DELTA { 379 // dataStream needs a ByteReader, so we wrap 380 // the reader in a bufio 381 dr, err := p.dataStream(bufio.NewReader(io.NewSectionReader(r, offset+int64(len(rawheader)), int64(worstdsize)))) 382 if err != nil { 383 return 0, nil, 0, err 384 } 385 datareader = bufio.NewReader(dr) 386 } 387 } else { 388 // If it's size 0, sz*3 would immediately return io.EOF and cause 389 // panic, so we just directly make the rawdata slice. 390 datareader = bytes.NewBuffer(nil) 391 } 392 393 // The way we calculate the hash changes based on if it's a delta 394 // or not. 395 switch t { 396 case OBJ_COMMIT, OBJ_TREE, OBJ_BLOB, OBJ_TAG: 397 return t, datareader, int64(sz), nil 398 case OBJ_REF_DELTA, OBJ_OFS_DELTA: 399 rawdata, err := ioutil.ReadAll(datareader) 400 if err != nil { 401 return 0, nil, 0, err 402 } 403 return idx.resolveDeltaForIndexing(r, t, rawdata, ObjectOffset(offset), ref, int64(refoffset), cache, refcache) 404 default: 405 return 0, nil, 0, fmt.Errorf("Unhandled object type %v: ", t) 406 } 407 } 408 409 // Find the object in the table. 410 func (idx PackfileIndexV2) GetObjectMetadata(r io.ReaderAt, s Sha1) (GitObject, error) { 411 foundIdx := -1 412 startIdx := idx.Fanout[s[0]] 413 414 // Packfiles are designed so that we could do a binary search here, but 415 // we don't need that optimization yet, so just do a linear search through 416 // the objects with the same first byte. 417 for i := startIdx - 1; idx.Sha1Table[i][0] == s[0]; i-- { 418 if s == idx.Sha1Table[i] { 419 foundIdx = int(i) 420 break 421 } 422 } 423 if foundIdx == -1 { 424 return nil, fmt.Errorf("Object not found: %v", s) 425 } 426 427 var offset int64 428 if idx.FourByteOffsets[foundIdx]&(1<<31) != 0 { 429 // clear out the MSB to get the offset 430 eightbyteOffset := idx.FourByteOffsets[foundIdx] ^ (1 << 31) 431 offset = int64(idx.EightByteOffsets[eightbyteOffset]) 432 } else { 433 offset = int64(idx.FourByteOffsets[foundIdx]) 434 } 435 436 // Now that we've figured out where the object lives, use the packfile 437 // to get the value from the packfile. 438 return idx.getObjectAtOffset(r, offset, true) 439 } 440 441 func (idx PackfileIndexV2) GetObject(r io.ReaderAt, s Sha1) (GitObject, error) { 442 foundIdx := -1 443 startIdx := idx.Fanout[s[0]] 444 if startIdx <= 0 { 445 // The fanout table holds the number of entries less than x, so we 446 // subtract 1 to make sure we don't miss the hash we're looking for, 447 // but we need a special case for s[0] == 0 to prevent underflow 448 startIdx = 1 449 } 450 451 // Packfiles are designed so that we could do a binary search here, but 452 // we don't need that optimization yet, so just do a linear search through 453 // the objects with the same first byte. 454 for i := startIdx - 1; idx.Sha1Table[i][0] == s[0]; i-- { 455 if s == idx.Sha1Table[i] { 456 foundIdx = int(i) 457 break 458 } 459 } 460 461 if foundIdx == -1 { 462 return nil, fmt.Errorf("Object not found: %v", s) 463 } 464 465 var offset int64 466 if idx.FourByteOffsets[foundIdx]&(1<<31) != 0 { 467 // clear out the MSB to get the offset 468 eightbyteOffset := idx.FourByteOffsets[foundIdx] ^ (1 << 31) 469 offset = int64(idx.EightByteOffsets[eightbyteOffset]) 470 } else { 471 offset = int64(idx.FourByteOffsets[foundIdx]) 472 } 473 474 // Now that we've figured out where the object lives, use the packfile 475 // to get the value from the packfile. 476 return idx.getObjectAtOffset(r, offset, false) 477 } 478 479 func getPackFileObject(idx io.Reader, packfile io.ReaderAt, s Sha1, metaOnly bool) (GitObject, error) { 480 var pack PackfileIndexV2 481 if err := binary.Read(idx, binary.BigEndian, &pack.magic); err != nil { 482 return nil, err 483 } 484 if err := binary.Read(idx, binary.BigEndian, &pack.Version); err != nil { 485 return nil, err 486 } 487 if err := binary.Read(idx, binary.BigEndian, &pack.Fanout); err != nil { 488 return nil, err 489 } 490 pack.Sha1Table = make([]Sha1, pack.Fanout[255]) 491 pack.CRC32 = make([]uint32, pack.Fanout[255]) 492 pack.FourByteOffsets = make([]uint32, pack.Fanout[255]) 493 // Load the tables. The first three are based on the number of 494 // objects in the packfile (stored in Fanout[255]), the last 495 // table is dynamicly sized. 496 497 for i := 0; i < len(pack.Sha1Table); i++ { 498 if err := binary.Read(idx, binary.BigEndian, &pack.Sha1Table[i]); err != nil { 499 return nil, err 500 } 501 } 502 for i := 0; i < len(pack.CRC32); i++ { 503 if err := binary.Read(idx, binary.BigEndian, &pack.CRC32[i]); err != nil { 504 return nil, err 505 } 506 } 507 for i := 0; i < len(pack.FourByteOffsets); i++ { 508 if err := binary.Read(idx, binary.BigEndian, &pack.FourByteOffsets[i]); err != nil { 509 return nil, err 510 } 511 } 512 513 // The number of eight byte offsets is dynamic, based on how many 514 // four byte offsets have the MSB set. 515 for _, offset := range pack.FourByteOffsets { 516 if offset&(1<<31) != 0 { 517 var val uint64 518 binary.Read(idx, binary.BigEndian, &val) 519 pack.EightByteOffsets = append(pack.EightByteOffsets, val) 520 } 521 } 522 if metaOnly { 523 return pack.GetObjectMetadata(packfile, s) 524 } 525 return pack.GetObject(packfile, s) 526 } 527 528 func (idx PackfileIndexV2) GetTrailer() (Sha1, Sha1) { 529 return idx.Packfile, idx.IdxFile 530 } 531 532 func (idx PackfileIndexV2) writeIndex(w io.Writer, withTrailer bool) error { 533 if err := binary.Write(w, binary.BigEndian, idx.magic); err != nil { 534 return err 535 } 536 if err := binary.Write(w, binary.BigEndian, idx.Version); err != nil { 537 return err 538 } 539 for _, fanout := range idx.Fanout { 540 if err := binary.Write(w, binary.BigEndian, fanout); err != nil { 541 return err 542 } 543 } 544 for _, sha := range idx.Sha1Table { 545 if err := binary.Write(w, binary.BigEndian, sha); err != nil { 546 return err 547 } 548 } 549 for _, crc32 := range idx.CRC32 { 550 if err := binary.Write(w, binary.BigEndian, crc32); err != nil { 551 return err 552 } 553 } 554 for _, offset := range idx.FourByteOffsets { 555 if err := binary.Write(w, binary.BigEndian, offset); err != nil { 556 return err 557 } 558 } 559 for _, offset := range idx.EightByteOffsets { 560 if err := binary.Write(w, binary.BigEndian, offset); err != nil { 561 return err 562 } 563 } 564 if err := binary.Write(w, binary.BigEndian, idx.Packfile); err != nil { 565 return err 566 } 567 if withTrailer { 568 if err := binary.Write(w, binary.BigEndian, idx.IdxFile); err != nil { 569 return err 570 } 571 } 572 return nil 573 } 574 func (idx PackfileIndexV2) HasObject(s Sha1) bool { 575 startIdx := idx.Fanout[s[0]] 576 if startIdx <= 0 { 577 // The fanout table holds the number of entries less than x, so we 578 // subtract 1 to make sure we don't miss the hash we're looking for, 579 // but we need a special case for s[0] == 0 to prevent underflow 580 startIdx = 1 581 } 582 583 // Packfiles are designed so that we could do a binary search here, but 584 // we don't need that optimization yet, so just do a linear search through 585 // the objects with the same first byte. 586 for i := int(startIdx - 1); i >= 0 && idx.Sha1Table[i][0] == s[0]; i-- { 587 if s == idx.Sha1Table[i] { 588 return true 589 } 590 } 591 return false 592 } 593 594 // Implements the Sorter interface on PackfileIndexV2, in order to sort the 595 // Sha1, CRC32, and 596 func (p *PackfileIndexV2) Len() int { 597 return int(p.Fanout[255]) 598 } 599 600 func (p *PackfileIndexV2) Swap(i, j int) { 601 p.Sha1Table[i], p.Sha1Table[j] = p.Sha1Table[j], p.Sha1Table[i] 602 p.CRC32[i], p.CRC32[j] = p.CRC32[j], p.CRC32[i] 603 p.FourByteOffsets[i], p.FourByteOffsets[j] = p.FourByteOffsets[j], p.FourByteOffsets[i] 604 } 605 606 func (p *PackfileIndexV2) Less(i, j int) bool { 607 for k := 0; k < 20; k++ { 608 if p.Sha1Table[i][k] < p.Sha1Table[j][k] { 609 return true 610 } else if p.Sha1Table[i][k] > p.Sha1Table[j][k] { 611 return false 612 } 613 } 614 return false 615 } 616 617 // calculates and stores the trailer into the packfile. 618 func (p *PackfileIndexV2) calculateTrailer() error { 619 trailer := sha1.New() 620 if err := p.writeIndex(trailer, false); err != nil { 621 return err 622 } 623 t, err := Sha1FromSlice(trailer.Sum(nil)) 624 if err != nil { 625 return err 626 } 627 p.IdxFile = t 628 return nil 629 } 630 631 // Update both the fanout table and Sha1Table for this index. 632 func (idx *PackfileIndexV2) updateFanout(i int, val Sha1) { 633 for j := int(val[0]); j < 256; j++ { 634 atomic.AddUint32(&idx.Fanout[j], 1) 635 } 636 637 // SHA1 is 160 bits.. since we know no one else is writing here, 638 // we pretend it's 2 64 bit ints and a 32 bit int so that we can 639 // use atomic writes instead of a lock. 640 atomic.StoreUint64((*uint64)(unsafe.Pointer(&idx.Sha1Table[i][0])), *(*uint64)(unsafe.Pointer(&val[0]))) 641 atomic.StoreUint64((*uint64)(unsafe.Pointer(&idx.Sha1Table[i][8])), *(*uint64)(unsafe.Pointer(&val[8]))) 642 atomic.StoreUint32((*uint32)(unsafe.Pointer(&idx.Sha1Table[i][16])), *(*uint32)(unsafe.Pointer(&val[16]))) 643 } 644 645 func IndexPack(c *Client, opts IndexPackOptions, r io.Reader) (idx PackfileIndex, rerr error) { 646 isfile := false 647 if f, ok := r.(*os.File); ok && !opts.Stdin { 648 // os.Stdin isn *os.File, but we want to consider it a stream. 649 isfile = (f != os.Stdin) 650 } 651 652 // If --verbose is set, keep track of the time to output 653 // a x kb/s in the output. 654 var startTime time.Time 655 if opts.Verbose { 656 startTime = time.Now() 657 } 658 659 deltas := list.New() 660 indexfile, initcb, icb, crc32cb, priorObjects, priorLocations := indexClosure(c, opts, deltas) 661 662 cb := func(r io.ReaderAt, i, n int, loc int64, t PackEntryType, sz PackEntrySize, ref Sha1, offset ObjectOffset, data []byte) error { 663 if !isfile && opts.Verbose { 664 now := time.Now() 665 elapsed := now.Unix() - startTime.Unix() 666 if elapsed == 0 { 667 progressF("Receiving objects: %2.f%% (%d/%d)", i+1 == n, (float32(i+1) / float32(n) * 100), i+1, n) 668 } else { 669 bps := loc / elapsed 670 progressF("Receiving objects: %2.f%% (%d/%d), %v | %v/s", i+1 == n, (float32(i+1) / float32(n) * 100), i+1, n, formatBytes(loc), formatBytes(bps)) 671 672 } 673 } 674 return icb(r, i, n, loc, t, sz, ref, offset, data) 675 } 676 677 trailerCB := func(r io.ReaderAt, n int, trailer Sha1) error { 678 i := 0 679 for e := deltas.Front(); e != nil; e = e.Next() { 680 i++ 681 delta := e.Value.(*packObject) 682 if opts.Verbose { 683 progressF("Resolving deltas: %2.f%% (%d/%d)", i+1 == deltas.Len(), (float32(i+1) / float32(deltas.Len()) * 100), i+1, deltas.Len()) 684 } 685 686 t, r, sz, err := indexfile.getObjectAtOffsetForIndexing(r, int64(delta.location), false, priorLocations, priorObjects) 687 if err != nil { 688 return err 689 } 690 691 var buf bytes.Buffer 692 if delta.deltasAgainst > 0 && delta.deltasAgainst < delta.deltasResolved { 693 r = io.TeeReader(r, &buf) 694 } 695 sha1, err := HashReaderWithSize(t.String(), sz, r) 696 if err != nil { 697 return err 698 } 699 if delta.deltasAgainst > 0 && delta.deltasAgainst < delta.deltasResolved { 700 ocache.Add(ObjectOffset(delta.location), cachedObject{t, buf.Bytes(), 0, Sha1{}}) 701 } 702 delta.oid = sha1 703 priorObjects[sha1] = delta 704 indexfile.updateFanout(delta.idx, sha1) 705 } 706 707 indexfile.Packfile = trailer 708 709 // println("Cached reads", cachedn, " Cache misses", cachemiss) 710 sort.Sort(indexfile) 711 // The sorting may have changed things, so as a final pass, hash 712 // everything in the index to get the trailer (instead of doing it 713 // while we were calculating it.) 714 if err := indexfile.calculateTrailer(); err != nil { 715 return err 716 } 717 return nil 718 } 719 720 pack, err := iteratePack(c, r, initcb, cb, trailerCB, crc32cb) 721 if err != nil { 722 // println("err: Cached reads", cachedn, " Cache misses", cachemiss) 723 return nil, err 724 } 725 defer pack.Close() 726 727 // Write the index to disk and return 728 var basename, idxname string 729 if f, ok := r.(*os.File); ok && isfile && !opts.Stdin { 730 basename = pack.Name() 731 basename = strings.TrimSuffix(f.Name(), ".pack") 732 idxname = basename + ".idx" 733 } else { 734 packhash, _ := indexfile.GetTrailer() 735 basename := filepath.Join(c.ObjectDir, "pack", fmt.Sprintf("pack-%s", packhash)) 736 idxname = basename + ".idx" 737 738 if opts.Keep != "" { 739 if err := ioutil.WriteFile(basename+".keep", []byte(opts.Keep+"\n"), 0755); err != nil { 740 return indexfile, err 741 } 742 } 743 744 pack.Close() 745 if err := os.Rename(pack.Name(), basename+".pack"); err != nil { 746 return indexfile, err 747 } 748 } 749 750 if opts.Output == nil { 751 o, err := os.Create(idxname) 752 if err != nil { 753 return indexfile, err 754 } 755 defer o.Close() 756 opts.Output = o 757 } 758 759 if err := indexfile.WriteIndex(opts.Output); err != nil { 760 return indexfile, err 761 } 762 return indexfile, err 763 } 764 765 type packObject struct { 766 idx int 767 oid Sha1 768 location ObjectOffset 769 deltasAgainst, deltasResolved int 770 baselocation ObjectOffset 771 typ PackEntryType 772 } 773 774 func indexClosure(c *Client, opts IndexPackOptions, deltas *list.List) (*PackfileIndexV2, func(int), packIterator, func(int, uint32) error, map[Sha1]*packObject, map[ObjectOffset]*packObject) { 775 var indexfile PackfileIndexV2 776 777 indexfile.magic = [4]byte{0377, 't', 'O', 'c'} 778 indexfile.Version = 2 779 780 var mu sync.Mutex 781 782 // For REF_DELTA to resolve 783 priorObjects := make(map[Sha1]*packObject) 784 // For OFS_DELTA to resolve 785 priorLocations := make(map[ObjectOffset]*packObject) 786 787 icb := func(n int) { 788 indexfile.Sha1Table = make([]Sha1, n) 789 indexfile.CRC32 = make([]uint32, n) 790 indexfile.FourByteOffsets = make([]uint32, n) 791 792 // See note in init function about the LRU causing 793 // memory leaks. If it's a small pack, we don't care, 794 // but on very large repos we need to use a small 795 // cache to avoid running out of memory. 796 // 797 // Chains of deltas tend to be close together it does 798 // still give us some benefit even with a small cache. 799 if n > 100000 { 800 ocache, _ = lru.New(250) 801 } else { 802 ocache, _ = lru.New(5000) 803 } 804 } 805 806 cb := func(r io.ReaderAt, i, n int, location int64, t PackEntryType, sz PackEntrySize, ref Sha1, offset ObjectOffset, rawdata []byte) error { 807 if opts.Verbose { 808 progressF("Indexing objects: %2.f%% (%d/%d)", i+1 == n, (float32(i+1) / float32(n) * 100), i+1, n) 809 } 810 811 if location < (1 << 31) { 812 atomic.StoreUint32(&indexfile.FourByteOffsets[i], uint32(location)) 813 } else { 814 atomic.StoreUint32(&indexfile.FourByteOffsets[i], uint32(len(indexfile.EightByteOffsets))|(1<<31)) 815 mu.Lock() 816 indexfile.EightByteOffsets = append(indexfile.EightByteOffsets, uint64(location)) 817 mu.Unlock() 818 } 819 820 switch t { 821 case OBJ_COMMIT, OBJ_TREE, OBJ_BLOB, OBJ_TAG: 822 ocache.Add(ObjectOffset(location), cachedObject{t, rawdata, 0, Sha1{}}) 823 sha1, err := HashReaderWithSize(t.String(), int64(len(rawdata)), bytes.NewReader(rawdata)) 824 if err != nil && opts.Strict { 825 return err 826 } 827 828 indexfile.updateFanout(i, sha1) 829 // Maintain the list of references for delta chains. 830 // There's a possibility a delta refers to a reference 831 // before the reference in packs inflated from thin packs, 832 // so we need to check if it exists before blindly 833 // setting it. 834 // If it's been already been referenced, cache it. 835 // Otherwise don't to save memory and only cache if 836 // there are references to it. 837 mu.Lock() 838 objCache := &packObject{ 839 idx: i, 840 oid: sha1, 841 location: ObjectOffset(location), 842 } 843 if o, ok := priorObjects[sha1]; !ok { 844 priorObjects[sha1] = objCache 845 } else { 846 // We have the lock and we know no one is reading 847 // these until we're done the first round of 848 // indexing anyways, so we don't bother to use 849 // the atomic package. 850 o.location = ObjectOffset(location) 851 o.idx = i 852 } 853 priorLocations[objCache.location] = objCache 854 mu.Unlock() 855 case OBJ_REF_DELTA: 856 log.Printf("Noting REF_DELTA to resolve: %v\n", ref) 857 mu.Lock() 858 o, ok := priorObjects[ref] 859 if !ok { 860 // It hasn't been seen yet, so just note 861 // that there's a a delta against it for 862 // later. 863 // Since we haven't seen it yet, we don't 864 // have a location. 865 objCache := &packObject{ 866 oid: ref, 867 deltasAgainst: 1, 868 deltasResolved: 0, 869 } 870 priorObjects[ref] = objCache 871 } else { 872 o.deltasAgainst += 1 873 } 874 self := &packObject{ 875 idx: i, 876 location: ObjectOffset(location), 877 deltasAgainst: 0, 878 deltasResolved: 0, 879 typ: t, 880 } 881 priorLocations[ObjectOffset(location)] = self 882 deltas.PushBack(self) 883 mu.Unlock() 884 case OBJ_OFS_DELTA: 885 log.Printf("Noting OFS_DELTA to resolve from %v\n", location-int64(offset)) 886 mu.Lock() 887 // Adjust the number of deltas against the parent 888 // priorLocations should always be populated with 889 // the prior objects (even if some fields aren't 890 // populated), and offets are always looking back 891 // into the packfile, so this shouldn't happen. 892 if o, ok := priorLocations[ObjectOffset(location-int64(offset))]; !ok { 893 panic("Can not determine delta base") 894 } else { 895 o.deltasAgainst += 1 896 } 897 898 // Add ourselves to the map for future deltas 899 self := &packObject{ 900 idx: i, 901 location: ObjectOffset(location), 902 deltasAgainst: 0, 903 deltasResolved: 0, 904 baselocation: ObjectOffset(location) - ObjectOffset(offset), 905 typ: t, 906 } 907 priorLocations[ObjectOffset(location)] = self 908 deltas.PushBack(self) 909 mu.Unlock() 910 default: 911 panic("Unhandled type in IndexPack: " + t.String()) 912 } 913 return nil 914 } 915 crc32cb := func(i int, crc uint32) error { 916 indexfile.CRC32[i] = crc 917 return nil 918 } 919 return &indexfile, icb, cb, crc32cb, priorObjects, priorLocations 920 } 921 922 // Indexes the pack, and stores a copy in Client's .git/objects/pack directory as it's 923 // doing so. This is the equivalent of "git index-pack --stdin", but works with any 924 // reader. 925 func IndexAndCopyPack(c *Client, opts IndexPackOptions, r io.Reader) (PackfileIndex, error) { 926 return IndexPack(c, opts, r) 927 } 928 929 func formatBytes(n int64) string { 930 if n <= 1024 { 931 return fmt.Sprintf("%v B", n) 932 } else if n <= 1024*1024 { 933 return fmt.Sprintf("%.2f KiB", float64(n)/float64(1024)) 934 } else if n <= 1024*1024*1024 { 935 return fmt.Sprintf("%.2f MiB", float64(n)/float64(1024*1024)) 936 } 937 return fmt.Sprintf("%.2f GiB", float64(n)/float64(1024*1024*1024)) 938 }