github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/blob.go (about)

     1  package badger
     2  
     3  import (
     4  	"encoding/binary"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"math"
     8  	"os"
     9  	"path/filepath"
    10  	"reflect"
    11  	"sort"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  	"sync/atomic"
    16  	"unsafe"
    17  
    18  	"github.com/pingcap/badger/directio"
    19  	"github.com/pingcap/badger/epoch"
    20  	"github.com/pingcap/badger/fileutil"
    21  	"github.com/pingcap/badger/y"
    22  	"github.com/pingcap/errors"
    23  	"github.com/pingcap/log"
    24  	"go.uber.org/zap"
    25  )
    26  
    27  const blobFileSuffix = ".blob"
    28  
    29  type blobPointer struct {
    30  	logicalAddr
    31  	length uint32
    32  }
    33  
    34  func (bp *blobPointer) decode(val []byte) {
    35  	ptr := (*blobPointer)(unsafe.Pointer(&val[0]))
    36  	*bp = *ptr
    37  }
    38  
    39  type mappingEntry struct {
    40  	logicalAddr
    41  	physicalOffset uint32
    42  }
    43  
    44  /*
    45  data format of blob file:
    46  	/ addrMappingLength(4) / addrMappingEntry(12) ... / entry ... / zero (4) / discardInfo ... /
    47  
    48  addrMappingEntry:
    49  	/ logicalAddr (8) / physicalOffset(4) /
    50  
    51  logicalAddr:
    52      / logicalFid(4) / logicalOffset(4) /
    53  
    54  entry:
    55  	/ value len(4) / value(value len) /
    56  
    57  discard info:
    58  	/ logicalAddr(8) ... / totalDiscard(4) / discardInfoLength(4) /
    59  */
    60  type blobFile struct {
    61  	path           string
    62  	fid            uint32
    63  	fd             *os.File
    64  	fileSize       uint32
    65  	mappingSize    uint32
    66  	mmap           []byte
    67  	mappingEntries []mappingEntry
    68  
    69  	// only accessed by gcHandler
    70  	totalDiscard uint32
    71  }
    72  
    73  func (bf *blobFile) getID() uint32 {
    74  	if bf == nil {
    75  		return math.MaxUint32
    76  	}
    77  	return bf.fid
    78  }
    79  
    80  func (bf *blobFile) loadOffsetMap() error {
    81  	var headBuf [4]byte
    82  	_, err := bf.fd.ReadAt(headBuf[:], 0)
    83  	if err != nil {
    84  		return err
    85  	}
    86  	bf.mappingSize = binary.LittleEndian.Uint32(headBuf[:])
    87  	if bf.mappingSize == 0 {
    88  		return nil
    89  	}
    90  	bf.mmap, err = y.Mmap(bf.fd, false, int64(bf.mappingSize))
    91  	if err != nil {
    92  		return err
    93  	}
    94  	hdr := (*reflect.SliceHeader)(unsafe.Pointer(&bf.mappingEntries))
    95  	hdr.Len = int(bf.mappingSize-4) / 12
    96  	hdr.Cap = hdr.Len
    97  	hdr.Data = uintptr(unsafe.Pointer(&bf.mmap[4]))
    98  	return nil
    99  }
   100  
   101  func (bf *blobFile) loadDiscards() error {
   102  	var footBuf [8]byte
   103  	_, err := bf.fd.ReadAt(footBuf[:], int64(bf.fileSize-8))
   104  	if err != nil {
   105  		return err
   106  	}
   107  	bf.totalDiscard = binary.LittleEndian.Uint32(footBuf[:])
   108  	return nil
   109  }
   110  
   111  func (bf *blobFile) read(bp blobPointer, s *y.Slice) (buf []byte, err error) {
   112  	physicalOff := int64(bf.getPhysicalOffset(bp.logicalAddr))
   113  	buf = s.Resize(int(bp.length))
   114  	_, err = bf.fd.ReadAt(buf, physicalOff) // skip the 4 bytes length.
   115  	return buf, err
   116  }
   117  
   118  func (bf *blobFile) getPhysicalOffset(addr logicalAddr) uint32 {
   119  	if bf.fid == addr.fid {
   120  		return addr.offset
   121  	}
   122  	n := sort.Search(len(bf.mappingEntries), func(i int) bool {
   123  		entry := bf.mappingEntries[i]
   124  		return !entry.logicalAddr.Less(addr)
   125  	})
   126  	return bf.mappingEntries[n].physicalOffset
   127  }
   128  
   129  func (bf *blobFile) Delete() error {
   130  	if bf.mmap != nil {
   131  		y.Munmap(bf.mmap)
   132  	}
   133  	bf.fd.Close()
   134  	return os.Remove(bf.path)
   135  }
   136  
   137  type blobFileBuilder struct {
   138  	fid    uint32
   139  	file   *os.File
   140  	writer *fileutil.DirectWriter
   141  }
   142  
   143  func newBlobFileBuilder(fid uint32, dir string, writeBufferSize int) (*blobFileBuilder, error) {
   144  	fileName := newBlobFileName(fid, dir)
   145  	file, err := directio.OpenFile(fileName, os.O_CREATE|os.O_RDWR, 0666)
   146  	if err != nil {
   147  		return nil, err
   148  	}
   149  	writer := fileutil.NewDirectWriter(file, writeBufferSize, nil)
   150  	// Write 4 bytes 0 header.
   151  	err = writer.Append(make([]byte, 4))
   152  	if err != nil {
   153  		return nil, err
   154  	}
   155  	return &blobFileBuilder{
   156  		fid:    uint32(fid),
   157  		file:   file,
   158  		writer: writer,
   159  	}, nil
   160  }
   161  
   162  func (bfb *blobFileBuilder) append(value []byte) (bp []byte, err error) {
   163  	var lenBuf [4]byte
   164  	binary.LittleEndian.PutUint32(lenBuf[:], uint32(len(value)))
   165  	err = bfb.writer.Append(lenBuf[:])
   166  	if err != nil {
   167  		return
   168  	}
   169  	offset := uint32(bfb.writer.Offset())
   170  	err = bfb.writer.Append(value)
   171  	if err != nil {
   172  		return
   173  	}
   174  	bp = make([]byte, 12)
   175  	binary.LittleEndian.PutUint32(bp, bfb.fid)
   176  	binary.LittleEndian.PutUint32(bp[4:], offset)
   177  	binary.LittleEndian.PutUint32(bp[8:], uint32(len(value)))
   178  	return
   179  }
   180  
   181  func (bfb *blobFileBuilder) finish() (*blobFile, error) {
   182  	// Write 4 bytes footer
   183  	err := bfb.writer.Append(make([]byte, 4))
   184  	if err != nil {
   185  		return nil, err
   186  	}
   187  	err = bfb.writer.Finish()
   188  	if err != nil {
   189  		return nil, err
   190  	}
   191  	_ = bfb.file.Close()
   192  	return newBlobFile(bfb.file.Name(), bfb.fid, uint32(bfb.writer.Offset()))
   193  }
   194  
   195  func newBlobFile(path string, fid, fileSize uint32) (*blobFile, error) {
   196  	file, err := os.OpenFile(path, os.O_RDWR, 0666)
   197  	if err != nil {
   198  		return nil, err
   199  	}
   200  	_, err = file.Seek(0, 2)
   201  	if err != nil {
   202  		return nil, err
   203  	}
   204  	return &blobFile{
   205  		path:     path,
   206  		fid:      fid,
   207  		fd:       file,
   208  		fileSize: fileSize,
   209  	}, nil
   210  }
   211  
   212  func newBlobFileName(id uint32, dir string) string {
   213  	return filepath.Join(dir, fmt.Sprintf("%08x", id)+blobFileSuffix)
   214  }
   215  
   216  type blobManager struct {
   217  	filesLock         sync.RWMutex
   218  	physicalFiles     map[uint32]*blobFile
   219  	logicalToPhysical map[uint32]uint32 // maps logical fid to physical fid.
   220  	changeLog         *os.File
   221  	dirPath           string
   222  	kv                *DB
   223  	discardCh         chan<- *DiscardStats
   224  	maxFileID         uint32
   225  }
   226  
   227  func (bm *blobManager) Open(kv *DB, opt Options) error {
   228  	bm.physicalFiles = map[uint32]*blobFile{}
   229  	bm.dirPath = opt.ValueDir
   230  	bm.kv = kv
   231  	validFids, err := bm.loadChangeLogs()
   232  	if err != nil {
   233  		return err
   234  	}
   235  	fileInfos, err := ioutil.ReadDir(bm.dirPath)
   236  	if err != nil {
   237  		return errors.Wrapf(err, "Error while opening blob files")
   238  	}
   239  	for _, fileInfo := range fileInfos {
   240  		if !strings.HasSuffix(fileInfo.Name(), ".blob") {
   241  			continue
   242  		}
   243  		fsz := len(fileInfo.Name())
   244  		fid64, err := strconv.ParseUint(fileInfo.Name()[:fsz-5], 16, 64)
   245  		if err != nil {
   246  			return errors.Wrapf(err, "Error while parsing blob file id for file: %q", fileInfo.Name())
   247  		}
   248  		fid := uint32(fid64)
   249  		path := filepath.Join(bm.dirPath, fileInfo.Name())
   250  		if _, ok := validFids[fid]; !ok {
   251  			_ = os.Remove(path)
   252  			continue
   253  		}
   254  		if _, ok := bm.physicalFiles[fid]; ok {
   255  			return errors.Errorf("Found the same blob file twice: %d", fid)
   256  		}
   257  		blobFile, err := newBlobFile(path, fid, uint32(fileInfo.Size()))
   258  		if err != nil {
   259  			return err
   260  		}
   261  		err = blobFile.loadOffsetMap()
   262  		if err != nil {
   263  			return err
   264  		}
   265  		err = blobFile.loadDiscards()
   266  		if err != nil {
   267  			return err
   268  		}
   269  		bm.physicalFiles[fid] = blobFile
   270  	}
   271  	for _, to := range bm.logicalToPhysical {
   272  		if _, ok := bm.physicalFiles[to]; !ok {
   273  			return errors.Errorf("File %d not found", to)
   274  		}
   275  	}
   276  	discardCh := make(chan *DiscardStats, 1024)
   277  	bm.discardCh = discardCh
   278  	gcHandler := &blobGCHandler{
   279  		bm:                bm,
   280  		discardCh:         discardCh,
   281  		gcCandidate:       map[*blobFile]struct{}{},
   282  		physicalCache:     make(map[uint32]*blobFile, len(bm.physicalFiles)),
   283  		logicalToPhysical: map[uint32]uint32{},
   284  	}
   285  	for k, v := range bm.logicalToPhysical {
   286  		gcHandler.logicalToPhysical[k] = v
   287  	}
   288  	for k, v := range bm.physicalFiles {
   289  		gcHandler.physicalCache[k] = v
   290  	}
   291  	kv.closers.blobManager = y.NewCloser(1)
   292  	go gcHandler.run(kv.closers.blobManager)
   293  	return nil
   294  }
   295  
   296  func (bm *blobManager) allocFileID() uint32 {
   297  	return atomic.AddUint32(&bm.maxFileID, 1)
   298  }
   299  
   300  func (bm *blobManager) read(ptr []byte, s *y.Slice, cache map[uint32]*blobCache) ([]byte, error) {
   301  	var bp blobPointer
   302  	bp.decode(ptr)
   303  	bc, ok := cache[bp.fid]
   304  	if !ok {
   305  		bf := bm.getFile(bp.fid)
   306  		bc = &blobCache{
   307  			file: bf,
   308  		}
   309  		cache[bf.fid] = bc
   310  	}
   311  	return bc.read(bp, s)
   312  }
   313  
   314  func (bm *blobManager) getFile(fid uint32) *blobFile {
   315  	bm.filesLock.RLock()
   316  	file, ok := bm.physicalFiles[fid]
   317  	if !ok {
   318  		var physicalID uint32
   319  		physicalID, ok = bm.logicalToPhysical[fid]
   320  		if ok {
   321  			file = bm.physicalFiles[physicalID]
   322  		}
   323  	}
   324  	if file == nil {
   325  		log.Error("failed to get file", zap.Uint32("id", fid))
   326  	}
   327  	bm.filesLock.RUnlock()
   328  	return file
   329  }
   330  
   331  func (bm *blobManager) addFile(file *blobFile) error {
   332  	buf := make([]byte, 8)
   333  	binary.LittleEndian.PutUint32(buf, file.fid)
   334  	binary.LittleEndian.PutUint32(buf[4:], file.fid)
   335  	_, err := bm.changeLog.Write(buf)
   336  	if err != nil {
   337  		return err
   338  	}
   339  	err = bm.changeLog.Sync()
   340  	if err != nil {
   341  		return err
   342  	}
   343  	bm.filesLock.Lock()
   344  	bm.physicalFiles[file.fid] = file
   345  	bm.filesLock.Unlock()
   346  	return nil
   347  }
   348  
   349  func (bm *blobManager) addGCFile(oldFiles []*blobFile, newFile *blobFile, logicalFiles map[uint32]struct{}, guard *epoch.Guard) error {
   350  	oldFids := make([]uint32, len(oldFiles))
   351  	for i, v := range oldFiles {
   352  		oldFids[i] = v.fid
   353  	}
   354  	log.Info("addGCFile", zap.Uint32s("old files", oldFids), zap.Uint32("new file id", newFile.getID()), zap.String("logical files", fmt.Sprintf("%v", logicalFiles)))
   355  	buf := make([]byte, len(oldFiles)*8)
   356  	for i, oldFile := range oldFiles {
   357  		offset := i * 8
   358  		binary.LittleEndian.PutUint32(buf[offset:], oldFile.fid)
   359  		binary.LittleEndian.PutUint32(buf[offset+4:], newFile.getID())
   360  	}
   361  	_, err := bm.changeLog.Write(buf)
   362  	if err != nil {
   363  		return err
   364  	}
   365  	err = bm.changeLog.Sync()
   366  	if err != nil {
   367  		return err
   368  	}
   369  	bm.filesLock.Lock()
   370  	if newFile != nil {
   371  		bm.physicalFiles[newFile.fid] = newFile
   372  		for logicalFid := range logicalFiles {
   373  			bm.logicalToPhysical[logicalFid] = newFile.fid
   374  		}
   375  	} else {
   376  		for logicalFid := range logicalFiles {
   377  			delete(bm.logicalToPhysical, logicalFid)
   378  		}
   379  	}
   380  	for _, old := range oldFiles {
   381  		delete(bm.physicalFiles, old.fid)
   382  	}
   383  	bm.filesLock.Unlock()
   384  	del := make([]epoch.Resource, len(oldFids))
   385  	for i := range oldFiles {
   386  		del[i] = oldFiles[i]
   387  	}
   388  	guard.Delete(del)
   389  	return nil
   390  }
   391  
   392  type fidNode struct {
   393  	fid  uint32
   394  	next *fidNode
   395  }
   396  
   397  func (bm *blobManager) loadChangeLogs() (validFids map[uint32]struct{}, err error) {
   398  	changeLogFileName := filepath.Join(bm.dirPath, "blob_change.log")
   399  	data, err := ioutil.ReadFile(changeLogFileName)
   400  	if err != nil && !os.IsNotExist(err) {
   401  		return nil, err
   402  	}
   403  	validFids = bm.buildLogicalToPhysical(data)
   404  	bm.changeLog, err = os.OpenFile(changeLogFileName, os.O_CREATE|os.O_RDWR, 0666)
   405  	if err != nil {
   406  		return nil, err
   407  	}
   408  	_, err = bm.changeLog.Seek(0, 2)
   409  	if err != nil {
   410  		return nil, err
   411  	}
   412  	return validFids, nil
   413  }
   414  
   415  func (bm *blobManager) buildLogicalToPhysical(data []byte) (validFids map[uint32]struct{}) {
   416  	changeLogMap := map[uint32]uint32{} // maps old fid to a new fid.
   417  	logicalFids := map[uint32]struct{}{}
   418  	for i := 0; i < len(data); i += 8 {
   419  		fromFid := binary.LittleEndian.Uint32(data[i:])
   420  		toFid := binary.LittleEndian.Uint32(data[i+4:])
   421  		changeLogMap[fromFid] = toFid
   422  		if fromFid == toFid {
   423  			logicalFids[fromFid] = struct{}{}
   424  		}
   425  	}
   426  	bm.logicalToPhysical = map[uint32]uint32{}
   427  	validFids = map[uint32]struct{}{}
   428  	for fid := range logicalFids {
   429  		toFid := getToFid(changeLogMap, fid)
   430  		if toFid != math.MaxUint32 {
   431  			bm.logicalToPhysical[fid] = toFid
   432  			validFids[toFid] = struct{}{}
   433  			if bm.maxFileID < toFid {
   434  				bm.maxFileID = toFid
   435  			}
   436  		}
   437  	}
   438  	return
   439  }
   440  
   441  func getToFid(logicalMap map[uint32]uint32, toFid uint32) uint32 {
   442  	for {
   443  		nextToFid, ok := logicalMap[toFid]
   444  		if !ok {
   445  			return toFid
   446  		}
   447  		if nextToFid == toFid {
   448  			return toFid
   449  		}
   450  		toFid = nextToFid
   451  	}
   452  }
   453  
   454  type blobGCHandler struct {
   455  	bm                *blobManager
   456  	discardCh         <-chan *DiscardStats
   457  	physicalCache     map[uint32]*blobFile
   458  	logicalToPhysical map[uint32]uint32
   459  
   460  	gcCandidate          map[*blobFile]struct{}
   461  	candidateValidSize   uint32
   462  	candidateDiscardSize uint64
   463  }
   464  
   465  func (h *blobGCHandler) run(c *y.Closer) {
   466  	defer c.Done()
   467  	for {
   468  		select {
   469  		case discardInfo := <-h.discardCh:
   470  			h.handleDiscardInfo(discardInfo)
   471  			err := h.doGCIfNeeded()
   472  			if err != nil {
   473  				log.Error("handle discardInfo", zap.Error(err))
   474  			}
   475  		case <-c.HasBeenClosed():
   476  			return
   477  		}
   478  	}
   479  }
   480  
   481  func (h *blobGCHandler) handleDiscardInfo(discardStats *DiscardStats) {
   482  	physicalDiscards := make(map[uint32][]blobPointer)
   483  	for _, ptr := range discardStats.ptrs {
   484  		physicalFid := h.getLogicalToPhysical(ptr.fid)
   485  		ptrs := physicalDiscards[physicalFid]
   486  		physicalDiscards[physicalFid] = append(ptrs, ptr)
   487  	}
   488  	for physicalFid, ptrs := range physicalDiscards {
   489  		err := h.writeDiscardToFile(physicalFid, ptrs)
   490  		if err != nil {
   491  			log.Error("handleDiscardInfo", zap.Uint32("physicalFid", physicalFid), zap.Error(err))
   492  			continue
   493  		}
   494  	}
   495  }
   496  
   497  func (h *blobGCHandler) getPhysicalFile(physicalFid uint32) *blobFile {
   498  	file := h.physicalCache[physicalFid]
   499  	if file == nil {
   500  		file = h.bm.getFile(physicalFid)
   501  		h.physicalCache[physicalFid] = file
   502  	}
   503  	return file
   504  }
   505  
   506  func (h *blobGCHandler) getLogicalToPhysical(logicalFid uint32) uint32 {
   507  	physicalFid, ok := h.logicalToPhysical[logicalFid]
   508  	if !ok {
   509  		// must be newly added L0 blob
   510  		h.logicalToPhysical[logicalFid] = logicalFid
   511  		physicalFid = logicalFid
   512  	}
   513  	return physicalFid
   514  }
   515  
   516  func (h *blobGCHandler) writeDiscardToFile(physicalFid uint32, ptrs []blobPointer) error {
   517  	file := h.getPhysicalFile(physicalFid)
   518  	discardInfo := make([]byte, uint32(len(ptrs)*8+8))
   519  	totalDiscard := file.totalDiscard + uint32(len(discardInfo))
   520  	for i, ptr := range ptrs {
   521  		binary.LittleEndian.PutUint32(discardInfo[i*8:], ptr.fid)
   522  		binary.LittleEndian.PutUint32(discardInfo[i*8+4:], ptr.offset)
   523  		totalDiscard += ptr.length
   524  	}
   525  	binary.LittleEndian.PutUint32(discardInfo[len(discardInfo)-8:], totalDiscard)
   526  	binary.LittleEndian.PutUint32(discardInfo[len(discardInfo)-4:], uint32(len(discardInfo)))
   527  	_, err := file.fd.Write(discardInfo)
   528  	if err != nil {
   529  		return err
   530  	}
   531  	file.totalDiscard = totalDiscard
   532  	file.fileSize += uint32(len(discardInfo))
   533  	if file.totalDiscard > file.fileSize/2 {
   534  		h.gcCandidate[file] = struct{}{}
   535  		h.candidateValidSize += file.fileSize - file.mappingSize - file.totalDiscard
   536  		h.candidateDiscardSize += uint64(file.totalDiscard)
   537  	}
   538  	return nil
   539  }
   540  
   541  var (
   542  	minCandidateValidSize   uint32 = 32 * 1024 * 1024
   543  	maxCandidateValidSize   uint32 = 128 * 1024 * 1024
   544  	maxCandidateDiscardSize uint64 = 512 * 1024 * 1024
   545  )
   546  
   547  func (h *blobGCHandler) doGCIfNeeded() error {
   548  	guard := h.bm.kv.resourceMgr.Acquire()
   549  	defer guard.Done()
   550  
   551  	if len(h.gcCandidate) == 0 {
   552  		return nil
   553  	}
   554  	if h.candidateValidSize < minCandidateValidSize && h.candidateDiscardSize < maxCandidateDiscardSize {
   555  		return nil
   556  	}
   557  	var oldFiles []*blobFile
   558  	var totalValidSize uint32
   559  	for candidate := range h.gcCandidate {
   560  		validSize := candidate.fileSize - candidate.mappingSize - candidate.totalDiscard
   561  		if totalValidSize+validSize > maxCandidateValidSize {
   562  			break
   563  		}
   564  		totalValidSize += validSize
   565  		oldFiles = append(oldFiles, candidate)
   566  		delete(h.gcCandidate, candidate)
   567  	}
   568  	var validEntries []validEntry
   569  	for _, blobFile := range oldFiles {
   570  		blobBytes, err := ioutil.ReadFile(blobFile.path)
   571  		if err != nil {
   572  			return err
   573  		}
   574  		validEntries = h.extractValidEntries(validEntries, blobFile, blobBytes)
   575  	}
   576  	if len(validEntries) == 0 {
   577  		for _, oldFile := range oldFiles {
   578  			delete(h.physicalCache, oldFile.fid)
   579  		}
   580  		return h.bm.addGCFile(oldFiles, nil, nil, guard)
   581  	}
   582  	sort.Slice(validEntries, func(i, j int) bool {
   583  		return validEntries[i].logicalAddr.Less(validEntries[j].logicalAddr)
   584  	})
   585  	newFid := h.bm.allocFileID()
   586  	fileName := newBlobFileName(newFid, h.bm.kv.opt.Dir)
   587  	file, err := directio.OpenFile(fileName, os.O_CREATE|os.O_RDWR, 0666)
   588  	if err != nil {
   589  		return err
   590  	}
   591  	writer := fileutil.NewDirectWriter(file, 1024*1024, nil)
   592  	// 4 bytes addrMapping length
   593  	mappingSize := 4 + uint32(len(validEntries))*12
   594  	lenBuf := make([]byte, 4)
   595  	binary.LittleEndian.PutUint32(lenBuf, mappingSize)
   596  	err = writer.Append(lenBuf)
   597  	if err != nil {
   598  		return err
   599  	}
   600  	mappingEntryBuf := make([]byte, 12)
   601  	newOffset := 4 + uint32(len(validEntries))*12 + 4
   602  	logicalFids := make(map[uint32]struct{})
   603  	for _, entry := range validEntries {
   604  		logicalFids[entry.fid] = struct{}{}
   605  		binary.LittleEndian.PutUint32(mappingEntryBuf, entry.fid)
   606  		binary.LittleEndian.PutUint32(mappingEntryBuf[4:], entry.offset)
   607  		binary.LittleEndian.PutUint32(mappingEntryBuf[8:], newOffset)
   608  		newOffset += uint32(len(entry.value)) + 4
   609  		err = writer.Append(mappingEntryBuf)
   610  		if err != nil {
   611  			return err
   612  		}
   613  	}
   614  	for _, entry := range validEntries {
   615  		binary.LittleEndian.PutUint32(lenBuf, uint32(len(entry.value)))
   616  		err = writer.Append(lenBuf)
   617  		if err != nil {
   618  			return err
   619  		}
   620  		err = writer.Append(entry.value)
   621  		if err != nil {
   622  			return err
   623  		}
   624  	}
   625  	// 4 bytes 0 discard length
   626  	err = writer.Append(make([]byte, 4))
   627  	if err != nil {
   628  		return err
   629  	}
   630  	err = writer.Finish()
   631  	if err != nil {
   632  		return err
   633  	}
   634  	file.Close()
   635  	blobFile, err := newBlobFile(file.Name(), newFid, uint32(writer.Offset()))
   636  	if err != nil {
   637  		return err
   638  	}
   639  	err = blobFile.loadOffsetMap()
   640  	if err != nil {
   641  		return err
   642  	}
   643  	h.physicalCache[newFid] = blobFile
   644  	for _, oldFile := range oldFiles {
   645  		delete(h.physicalCache, oldFile.fid)
   646  	}
   647  	for logicalFid := range logicalFids {
   648  		h.logicalToPhysical[logicalFid] = newFid
   649  	}
   650  	return h.bm.addGCFile(oldFiles, blobFile, logicalFids, guard)
   651  }
   652  
   653  type logicalAddr struct {
   654  	fid    uint32
   655  	offset uint32
   656  }
   657  
   658  func (a logicalAddr) Less(b logicalAddr) bool {
   659  	if a.fid == b.fid {
   660  		return a.offset < b.offset
   661  	}
   662  	return a.fid < b.fid
   663  }
   664  
   665  type validEntry struct {
   666  	logicalAddr
   667  	value []byte
   668  }
   669  
   670  func (h *blobGCHandler) extractValidEntries(validEntries []validEntry, file *blobFile, blobBytes []byte) []validEntry {
   671  	physicalToLogical := make(map[uint32]logicalAddr, len(file.mappingEntries))
   672  	for _, mappingEntry := range file.mappingEntries {
   673  		physicalToLogical[mappingEntry.physicalOffset] = mappingEntry.logicalAddr
   674  	}
   675  	discardedPhysicalOffsets, endOff := h.buildDiscardPhysicalOffsets(file, blobBytes)
   676  	cursor := file.mappingSize
   677  	for cursor < endOff {
   678  		valLen := binary.LittleEndian.Uint32(blobBytes[cursor:])
   679  		cursor += 4
   680  		physicalOff := cursor
   681  		cursor += valLen
   682  		_, isDiscarded := discardedPhysicalOffsets[physicalOff]
   683  		if isDiscarded {
   684  			continue
   685  		}
   686  		var logical logicalAddr
   687  		if len(file.mappingEntries) == 0 {
   688  			logical.fid = file.fid
   689  			logical.offset = physicalOff
   690  		} else {
   691  			logical = physicalToLogical[physicalOff]
   692  		}
   693  		validEntries = append(validEntries, validEntry{
   694  			value:       blobBytes[physicalOff : physicalOff+valLen],
   695  			logicalAddr: logical,
   696  		})
   697  	}
   698  	return validEntries
   699  }
   700  
   701  func (h *blobGCHandler) buildDiscardPhysicalOffsets(file *blobFile, blobBytes []byte) (discards map[uint32]struct{}, endOff uint32) {
   702  	discards = make(map[uint32]struct{})
   703  	blobBytesOff := uint32(len(blobBytes))
   704  	for {
   705  		discardLength := binary.LittleEndian.Uint32(blobBytes[blobBytesOff-4:])
   706  		if discardLength == 0 {
   707  			break
   708  		}
   709  		discardAddrs := blobBytes[blobBytesOff-discardLength : blobBytesOff-8]
   710  		blobBytesOff -= discardLength
   711  		for i := 0; i < len(discardAddrs); i += 8 {
   712  			var addr logicalAddr
   713  			addr.fid = binary.LittleEndian.Uint32(discardAddrs[i:])
   714  			addr.offset = binary.LittleEndian.Uint32(discardAddrs[i+4:])
   715  			physicalOffset := file.getPhysicalOffset(addr)
   716  			discards[physicalOffset] = struct{}{}
   717  		}
   718  	}
   719  	return discards, blobBytesOff
   720  }
   721  
   722  type blobCache struct {
   723  	file         *blobFile
   724  	cacheData    []byte
   725  	cacheOffset  uint32
   726  	lastPhysical uint32
   727  }
   728  
   729  const cacheSize = 8 * 1024
   730  
   731  func (bc *blobCache) read(bp blobPointer, slice *y.Slice) ([]byte, error) {
   732  	physicalOffset := bc.file.getPhysicalOffset(bp.logicalAddr)
   733  	lastPhysical := bc.lastPhysical
   734  	bc.lastPhysical = physicalOffset
   735  	if lastPhysical == 0 || bp.length > cacheSize {
   736  		return bc.file.read(bp, slice)
   737  	}
   738  	if physicalOffset >= bc.cacheOffset && physicalOffset+bp.length < bc.cacheOffset+uint32(len(bc.cacheData)) {
   739  		off := physicalOffset - bc.cacheOffset
   740  		return bc.cacheData[off : off+bp.length], nil
   741  	}
   742  	if bc.cacheData == nil {
   743  		bc.cacheData = make([]byte, cacheSize)
   744  	}
   745  	readLen := uint32(len(bc.cacheData))
   746  	if readLen > bc.file.fileSize-physicalOffset {
   747  		readLen = bc.file.fileSize - physicalOffset
   748  	}
   749  	_, err := bc.file.fd.ReadAt(bc.cacheData[:readLen], int64(physicalOffset))
   750  	if err != nil {
   751  		return nil, err
   752  	}
   753  	bc.cacheOffset = physicalOffset
   754  	return bc.cacheData[:bp.length], nil
   755  }