github.com/ledgerwatch/erigon-lib@v1.0.0/state/locality_index.go (about)

     1  /*
     2     Copyright 2022 Erigon contributors
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package state
    18  
    19  import (
    20  	"bytes"
    21  	"container/heap"
    22  	"context"
    23  	"fmt"
    24  	"os"
    25  	"path/filepath"
    26  	"regexp"
    27  	"strconv"
    28  	"sync/atomic"
    29  	"time"
    30  
    31  	"github.com/ledgerwatch/erigon-lib/common/assert"
    32  	"github.com/ledgerwatch/erigon-lib/common/dir"
    33  	"github.com/ledgerwatch/erigon-lib/kv/bitmapdb"
    34  	"github.com/ledgerwatch/erigon-lib/recsplit"
    35  	"github.com/ledgerwatch/log/v3"
    36  )
    37  
    38  const LocalityIndexUint64Limit = 64 //bitmap spend 1 bit per file, stored as uint64
    39  
    40  // LocalityIndex - has info in which .ef files exists given key
    41  // Format: key -> bitmap(step_number_list)
    42  // step_number_list is list of .ef files where exists given key
    43  type LocalityIndex struct {
    44  	filenameBase    string
    45  	dir, tmpdir     string // Directory where static files are created
    46  	aggregationStep uint64 // immutable
    47  
    48  	file *filesItem
    49  	bm   *bitmapdb.FixedSizeBitmaps
    50  
    51  	roFiles  atomic.Pointer[ctxItem]
    52  	roBmFile atomic.Pointer[bitmapdb.FixedSizeBitmaps]
    53  	logger   log.Logger
    54  }
    55  
    56  func NewLocalityIndex(
    57  	dir, tmpdir string,
    58  	aggregationStep uint64,
    59  	filenameBase string,
    60  	logger log.Logger,
    61  ) (*LocalityIndex, error) {
    62  	li := &LocalityIndex{
    63  		dir:             dir,
    64  		tmpdir:          tmpdir,
    65  		aggregationStep: aggregationStep,
    66  		filenameBase:    filenameBase,
    67  		logger:          logger,
    68  	}
    69  	return li, nil
    70  }
    71  func (li *LocalityIndex) closeWhatNotInList(fNames []string) {
    72  	if li == nil || li.bm == nil {
    73  		return
    74  	}
    75  
    76  	for _, protectName := range fNames {
    77  		if li.bm.FileName() == protectName {
    78  			return
    79  		}
    80  	}
    81  	li.closeFiles()
    82  }
    83  
    84  func (li *LocalityIndex) OpenList(fNames []string) error {
    85  	if li == nil {
    86  		return nil
    87  	}
    88  	li.closeWhatNotInList(fNames)
    89  	_ = li.scanStateFiles(fNames)
    90  	if err := li.openFiles(); err != nil {
    91  		return fmt.Errorf("NewHistory.openFiles: %s, %w", li.filenameBase, err)
    92  	}
    93  	return nil
    94  }
    95  
    96  func (li *LocalityIndex) scanStateFiles(fNames []string) (uselessFiles []*filesItem) {
    97  	if li == nil {
    98  		return nil
    99  	}
   100  
   101  	re := regexp.MustCompile("^" + li.filenameBase + ".([0-9]+)-([0-9]+).li$")
   102  	var err error
   103  	for _, name := range fNames {
   104  		subs := re.FindStringSubmatch(name)
   105  		if len(subs) != 3 {
   106  			if len(subs) != 0 {
   107  				li.logger.Warn("File ignored by inverted index scan, more than 3 submatches", "name", name, "submatches", len(subs))
   108  			}
   109  			continue
   110  		}
   111  		var startStep, endStep uint64
   112  		if startStep, err = strconv.ParseUint(subs[1], 10, 64); err != nil {
   113  			li.logger.Warn("File ignored by inverted index scan, parsing startTxNum", "error", err, "name", name)
   114  			continue
   115  		}
   116  		if endStep, err = strconv.ParseUint(subs[2], 10, 64); err != nil {
   117  			li.logger.Warn("File ignored by inverted index scan, parsing endTxNum", "error", err, "name", name)
   118  			continue
   119  		}
   120  		if startStep > endStep {
   121  			li.logger.Warn("File ignored by inverted index scan, startTxNum > endTxNum", "name", name)
   122  			continue
   123  		}
   124  
   125  		if startStep != 0 {
   126  			li.logger.Warn("LocalityIndex must always starts from step 0")
   127  			continue
   128  		}
   129  		if endStep > StepsInBiggestFile*LocalityIndexUint64Limit {
   130  			li.logger.Warn("LocalityIndex does store bitmaps as uint64, means it can't handle > 2048 steps. But it's possible to implement")
   131  			continue
   132  		}
   133  
   134  		startTxNum, endTxNum := startStep*li.aggregationStep, endStep*li.aggregationStep
   135  		if li.file == nil {
   136  			li.file = newFilesItem(startTxNum, endTxNum, li.aggregationStep)
   137  			li.file.frozen = false // LocalityIndex files are never frozen
   138  		} else if li.file.endTxNum < endTxNum {
   139  			uselessFiles = append(uselessFiles, li.file)
   140  			li.file = newFilesItem(startTxNum, endTxNum, li.aggregationStep)
   141  			li.file.frozen = false // LocalityIndex files are never frozen
   142  		}
   143  	}
   144  	return uselessFiles
   145  }
   146  
   147  func (li *LocalityIndex) openFiles() (err error) {
   148  	if li == nil || li.file == nil {
   149  		return nil
   150  	}
   151  
   152  	fromStep, toStep := li.file.startTxNum/li.aggregationStep, li.file.endTxNum/li.aggregationStep
   153  	if li.bm == nil {
   154  		dataPath := filepath.Join(li.dir, fmt.Sprintf("%s.%d-%d.l", li.filenameBase, fromStep, toStep))
   155  		if dir.FileExist(dataPath) {
   156  			li.bm, err = bitmapdb.OpenFixedSizeBitmaps(dataPath, int((toStep-fromStep)/StepsInBiggestFile))
   157  			if err != nil {
   158  				return err
   159  			}
   160  		}
   161  	}
   162  	if li.file.index == nil {
   163  		idxPath := filepath.Join(li.dir, fmt.Sprintf("%s.%d-%d.li", li.filenameBase, fromStep, toStep))
   164  		if dir.FileExist(idxPath) {
   165  			li.file.index, err = recsplit.OpenIndex(idxPath)
   166  			if err != nil {
   167  				return fmt.Errorf("LocalityIndex.openFiles: %w, %s", err, idxPath)
   168  			}
   169  		}
   170  	}
   171  	li.reCalcRoFiles()
   172  	return nil
   173  }
   174  
   175  func (li *LocalityIndex) closeFiles() {
   176  	if li == nil {
   177  		return
   178  	}
   179  	if li.file != nil && li.file.index != nil {
   180  		li.file.index.Close()
   181  		li.file = nil
   182  	}
   183  	if li.bm != nil {
   184  		li.bm.Close()
   185  		li.bm = nil
   186  	}
   187  }
   188  func (li *LocalityIndex) reCalcRoFiles() {
   189  	if li == nil || li.file == nil {
   190  		return
   191  	}
   192  	li.roFiles.Store(&ctxItem{
   193  		startTxNum: li.file.startTxNum,
   194  		endTxNum:   li.file.endTxNum,
   195  		i:          0,
   196  		src:        li.file,
   197  	})
   198  	li.roBmFile.Store(li.bm)
   199  }
   200  
   201  func (li *LocalityIndex) MakeContext() *ctxLocalityIdx {
   202  	if li == nil {
   203  		return nil
   204  	}
   205  	x := &ctxLocalityIdx{
   206  		file: li.roFiles.Load(),
   207  		bm:   li.roBmFile.Load(),
   208  	}
   209  	if x.file != nil && x.file.src != nil {
   210  		x.file.src.refcount.Add(1)
   211  	}
   212  	return x
   213  }
   214  
   215  func (out *ctxLocalityIdx) Close(logger log.Logger) {
   216  	if out == nil || out.file == nil || out.file.src == nil {
   217  		return
   218  	}
   219  	refCnt := out.file.src.refcount.Add(-1)
   220  	if refCnt == 0 && out.file.src.canDelete.Load() {
   221  		closeLocalityIndexFilesAndRemove(out, logger)
   222  	}
   223  }
   224  
   225  func closeLocalityIndexFilesAndRemove(i *ctxLocalityIdx, logger log.Logger) {
   226  	if i.file.src != nil {
   227  		i.file.src.closeFilesAndRemove()
   228  		i.file.src = nil
   229  	}
   230  	if i.bm != nil {
   231  		i.bm.Close()
   232  		if err := os.Remove(i.bm.FilePath()); err != nil {
   233  			logger.Trace("os.Remove", "err", err, "file", i.bm.FileName())
   234  		}
   235  		i.bm = nil
   236  	}
   237  }
   238  
   239  func (li *LocalityIndex) Close() {
   240  	li.closeWhatNotInList([]string{})
   241  	li.reCalcRoFiles()
   242  }
   243  func (li *LocalityIndex) Files() (res []string) { return res }
   244  func (li *LocalityIndex) NewIdxReader() *recsplit.IndexReader {
   245  	if li != nil && li.file != nil && li.file.index != nil {
   246  		return recsplit.NewIndexReader(li.file.index)
   247  	}
   248  	return nil
   249  }
   250  
   251  // LocalityIndex return exactly 2 file (step)
   252  // prevents searching key in many files
   253  func (li *LocalityIndex) lookupIdxFiles(loc *ctxLocalityIdx, key []byte, fromTxNum uint64) (exactShard1, exactShard2 uint64, lastIndexedTxNum uint64, ok1, ok2 bool) {
   254  	if li == nil || loc == nil || loc.bm == nil {
   255  		return 0, 0, 0, false, false
   256  	}
   257  	if loc.reader == nil {
   258  		loc.reader = recsplit.NewIndexReader(loc.file.src.index)
   259  	}
   260  
   261  	if fromTxNum >= loc.file.endTxNum {
   262  		return 0, 0, fromTxNum, false, false
   263  	}
   264  
   265  	fromFileNum := fromTxNum / li.aggregationStep / StepsInBiggestFile
   266  	fn1, fn2, ok1, ok2, err := loc.bm.First2At(loc.reader.Lookup(key), fromFileNum)
   267  	if err != nil {
   268  		panic(err)
   269  	}
   270  	return fn1 * StepsInBiggestFile, fn2 * StepsInBiggestFile, loc.file.endTxNum, ok1, ok2
   271  }
   272  
   273  func (li *LocalityIndex) missedIdxFiles(ii *InvertedIndexContext) (toStep uint64, idxExists bool) {
   274  	if len(ii.files) == 0 {
   275  		return 0, true
   276  	}
   277  	var item *ctxItem
   278  	for i := len(ii.files) - 1; i >= 0; i-- {
   279  		if ii.files[i].src.frozen {
   280  			item = &ii.files[i]
   281  			break
   282  		}
   283  	}
   284  	if item != nil {
   285  		toStep = item.endTxNum / li.aggregationStep
   286  	}
   287  	fName := fmt.Sprintf("%s.%d-%d.li", li.filenameBase, 0, toStep)
   288  	return toStep, dir.FileExist(filepath.Join(li.dir, fName))
   289  }
   290  func (li *LocalityIndex) buildFiles(ctx context.Context, ic *InvertedIndexContext, toStep uint64) (files *LocalityIndexFiles, err error) {
   291  	defer ic.ii.EnableMadvNormalReadAhead().DisableReadAhead()
   292  
   293  	logEvery := time.NewTicker(30 * time.Second)
   294  	defer logEvery.Stop()
   295  
   296  	fromStep := uint64(0)
   297  	count := 0
   298  	it := ic.iterateKeysLocality(toStep * li.aggregationStep)
   299  	for it.HasNext() {
   300  		_, _ = it.Next()
   301  		count++
   302  	}
   303  
   304  	fName := fmt.Sprintf("%s.%d-%d.li", li.filenameBase, fromStep, toStep)
   305  	idxPath := filepath.Join(li.dir, fName)
   306  	filePath := filepath.Join(li.dir, fmt.Sprintf("%s.%d-%d.l", li.filenameBase, fromStep, toStep))
   307  
   308  	rs, err := recsplit.NewRecSplit(recsplit.RecSplitArgs{
   309  		KeyCount:   count,
   310  		Enums:      false,
   311  		BucketSize: 2000,
   312  		LeafSize:   8,
   313  		TmpDir:     li.tmpdir,
   314  		IndexFile:  idxPath,
   315  	}, li.logger)
   316  	if err != nil {
   317  		return nil, fmt.Errorf("create recsplit: %w", err)
   318  	}
   319  	defer rs.Close()
   320  	rs.LogLvl(log.LvlTrace)
   321  
   322  	i := uint64(0)
   323  	for {
   324  		dense, err := bitmapdb.NewFixedSizeBitmapsWriter(filePath, int(it.FilesAmount()), uint64(count), li.logger)
   325  		if err != nil {
   326  			return nil, err
   327  		}
   328  		defer dense.Close()
   329  
   330  		it = ic.iterateKeysLocality(toStep * li.aggregationStep)
   331  		for it.HasNext() {
   332  			k, inFiles := it.Next()
   333  			if err := dense.AddArray(i, inFiles); err != nil {
   334  				return nil, err
   335  			}
   336  			if err = rs.AddKey(k, 0); err != nil {
   337  				return nil, err
   338  			}
   339  			i++
   340  
   341  			select {
   342  			case <-ctx.Done():
   343  				return nil, ctx.Err()
   344  			case <-logEvery.C:
   345  				li.logger.Info("[LocalityIndex] build", "name", li.filenameBase, "progress", fmt.Sprintf("%.2f%%", 50+it.Progress()/2))
   346  			default:
   347  			}
   348  		}
   349  
   350  		if err := dense.Build(); err != nil {
   351  			return nil, err
   352  		}
   353  
   354  		if err = rs.Build(ctx); err != nil {
   355  			if rs.Collision() {
   356  				li.logger.Debug("Building recsplit. Collision happened. It's ok. Restarting...")
   357  				rs.ResetNextSalt()
   358  			} else {
   359  				return nil, fmt.Errorf("build idx: %w", err)
   360  			}
   361  		} else {
   362  			break
   363  		}
   364  	}
   365  
   366  	idx, err := recsplit.OpenIndex(idxPath)
   367  	if err != nil {
   368  		return nil, err
   369  	}
   370  	bm, err := bitmapdb.OpenFixedSizeBitmaps(filePath, int(it.FilesAmount()))
   371  	if err != nil {
   372  		return nil, err
   373  	}
   374  	return &LocalityIndexFiles{index: idx, bm: bm}, nil
   375  }
   376  
   377  func (li *LocalityIndex) integrateFiles(sf LocalityIndexFiles, txNumFrom, txNumTo uint64) {
   378  	if li.file != nil {
   379  		li.file.canDelete.Store(true)
   380  	}
   381  	li.file = &filesItem{
   382  		startTxNum: txNumFrom,
   383  		endTxNum:   txNumTo,
   384  		index:      sf.index,
   385  		frozen:     false,
   386  	}
   387  	li.bm = sf.bm
   388  	li.reCalcRoFiles()
   389  }
   390  
   391  func (li *LocalityIndex) BuildMissedIndices(ctx context.Context, ii *InvertedIndexContext) error {
   392  	if li == nil {
   393  		return nil
   394  	}
   395  	toStep, idxExists := li.missedIdxFiles(ii)
   396  	if idxExists || toStep == 0 {
   397  		return nil
   398  	}
   399  	fromStep := uint64(0)
   400  	f, err := li.buildFiles(ctx, ii, toStep)
   401  	if err != nil {
   402  		return err
   403  	}
   404  	li.integrateFiles(*f, fromStep*li.aggregationStep, toStep*li.aggregationStep)
   405  	return nil
   406  }
   407  
   408  type LocalityIndexFiles struct {
   409  	index *recsplit.Index
   410  	bm    *bitmapdb.FixedSizeBitmaps
   411  }
   412  
   413  func (sf LocalityIndexFiles) Close() {
   414  	if sf.index != nil {
   415  		sf.index.Close()
   416  	}
   417  	if sf.bm != nil {
   418  		sf.bm.Close()
   419  	}
   420  }
   421  
   422  type LocalityIterator struct {
   423  	hc               *InvertedIndexContext
   424  	h                ReconHeapOlderFirst
   425  	files, nextFiles []uint64
   426  	key, nextKey     []byte
   427  	progress         uint64
   428  	hasNext          bool
   429  
   430  	totalOffsets, filesAmount uint64
   431  }
   432  
   433  func (si *LocalityIterator) advance() {
   434  	for si.h.Len() > 0 {
   435  		top := heap.Pop(&si.h).(*ReconItem)
   436  		key := top.key
   437  		_, offset := top.g.NextUncompressed()
   438  		si.progress += offset - top.lastOffset
   439  		top.lastOffset = offset
   440  		inStep := uint32(top.startTxNum / si.hc.ii.aggregationStep)
   441  		if top.g.HasNext() {
   442  			top.key, _ = top.g.NextUncompressed()
   443  			heap.Push(&si.h, top)
   444  		}
   445  
   446  		inFile := inStep / StepsInBiggestFile
   447  
   448  		if !bytes.Equal(key, si.key) {
   449  			if si.key == nil {
   450  				si.key = key
   451  				si.files = append(si.files, uint64(inFile))
   452  				continue
   453  			}
   454  
   455  			si.nextFiles, si.files = si.files, si.nextFiles[:0]
   456  			si.nextKey = si.key
   457  
   458  			si.files = append(si.files, uint64(inFile))
   459  			si.key = key
   460  			si.hasNext = true
   461  			return
   462  		}
   463  		si.files = append(si.files, uint64(inFile))
   464  	}
   465  	si.nextFiles, si.files = si.files, si.nextFiles[:0]
   466  	si.nextKey = si.key
   467  	si.hasNext = false
   468  }
   469  
   470  func (si *LocalityIterator) HasNext() bool { return si.hasNext }
   471  func (si *LocalityIterator) Progress() float64 {
   472  	return (float64(si.progress) / float64(si.totalOffsets)) * 100
   473  }
   474  func (si *LocalityIterator) FilesAmount() uint64 { return si.filesAmount }
   475  
   476  func (si *LocalityIterator) Next() ([]byte, []uint64) {
   477  	si.advance()
   478  	return si.nextKey, si.nextFiles
   479  }
   480  
   481  func (ic *InvertedIndexContext) iterateKeysLocality(uptoTxNum uint64) *LocalityIterator {
   482  	si := &LocalityIterator{hc: ic}
   483  	for _, item := range ic.files {
   484  		if !item.src.frozen || item.startTxNum > uptoTxNum {
   485  			continue
   486  		}
   487  		if assert.Enable {
   488  			if (item.endTxNum-item.startTxNum)/ic.ii.aggregationStep != StepsInBiggestFile {
   489  				panic(fmt.Errorf("frozen file of small size: %s", item.src.decompressor.FileName()))
   490  			}
   491  		}
   492  		g := item.src.decompressor.MakeGetter()
   493  		if g.HasNext() {
   494  			key, offset := g.NextUncompressed()
   495  
   496  			heapItem := &ReconItem{startTxNum: item.startTxNum, endTxNum: item.endTxNum, g: g, txNum: ^item.endTxNum, key: key, startOffset: offset, lastOffset: offset}
   497  			heap.Push(&si.h, heapItem)
   498  		}
   499  		si.totalOffsets += uint64(g.Size())
   500  		si.filesAmount++
   501  	}
   502  	si.advance()
   503  	return si
   504  }