github.com/TrueBlocks/trueblocks-core/src/apps/chifra@v0.0.0-20241022031540-b362680128f7/pkg/walk/walker.go (about)

     1  package walk
     2  
     3  import (
     4  	"context"
     5  
     6  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/base"
     7  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/logger"
     8  )
     9  
    10  type walkerFunc func(walker *CacheWalker, path string, first bool) (bool, error)
    11  type ForEveryFunc func(path string, vP any) (bool, error)
    12  
    13  type CacheWalker struct {
    14  	chain      string
    15  	testMode   bool
    16  	maxTests   int
    17  	visitFunc1 walkerFunc
    18  }
    19  
    20  func NewCacheWalker(chain string, testMode bool, maxTests int, visitFunc walkerFunc) CacheWalker {
    21  	return CacheWalker{
    22  		chain:      chain,
    23  		testMode:   testMode,
    24  		maxTests:   maxTests,
    25  		visitFunc1: visitFunc,
    26  	}
    27  }
    28  
    29  func (walker *CacheWalker) MaxTests() int {
    30  	return walker.maxTests
    31  }
    32  
    33  func ForEveryFileInFolder(path string, forEvery ForEveryFunc, vP any) error {
    34  	visitFunc := func(walker *CacheWalker, path string, first bool) (bool, error) {
    35  		return forEvery(path, vP)
    36  	}
    37  	walker := NewCacheWalker("", false, int(base.NOPOSI), visitFunc)
    38  	return walker.WalkRegularFolder(path)
    39  }
    40  
    41  func (walker *CacheWalker) WalkRegularFolder(path string) error {
    42  	filenameChan := make(chan CacheFileInfo)
    43  
    44  	var nRoutines int = 1
    45  	go WalkFolder(context.Background(), path, nil, filenameChan)
    46  
    47  	cnt := 0
    48  	for result := range filenameChan {
    49  		switch result.Type {
    50  		case Regular:
    51  			ok, err := walker.visitFunc1(walker, result.Path, cnt == 0)
    52  			if err != nil {
    53  				return err
    54  			}
    55  			if ok {
    56  				cnt++
    57  			} else {
    58  				return nil
    59  			}
    60  		case Cache_NotACache:
    61  			nRoutines--
    62  			if nRoutines == 0 {
    63  				close(filenameChan)
    64  				continue
    65  			}
    66  		default:
    67  			logger.Fatal("should not happen ==> in WalkRegularFolder")
    68  		}
    69  	}
    70  
    71  	return nil
    72  }
    73  
    74  func (walker *CacheWalker) WalkBloomFilters(blockNums []base.Blknum) error {
    75  	filenameChan := make(chan CacheFileInfo)
    76  
    77  	// TODO: changing this will probably create data races because we append to slices and/or modify maps
    78  	// in the visitFunc. We need to make sure that we don't modify the same data structure in two different
    79  	// goroutines.
    80  	var nRoutines int = 1
    81  	go WalkCacheFolder(context.Background(), walker.chain, Index_Bloom, nil, filenameChan)
    82  
    83  	cnt := 0
    84  	for result := range filenameChan {
    85  		switch result.Type {
    86  		case Index_Bloom:
    87  			if walker.shouldDisplay(result, cnt, blockNums) {
    88  				ok, err := walker.visitFunc1(walker, result.Path, cnt == 0)
    89  				if err != nil {
    90  					return err
    91  				}
    92  				if ok {
    93  					cnt++
    94  				} else {
    95  					return nil
    96  				}
    97  			}
    98  		case Cache_NotACache:
    99  			nRoutines--
   100  			if nRoutines == 0 {
   101  				close(filenameChan)
   102  				continue
   103  			}
   104  		default:
   105  			logger.Fatal("should not happen ==> you may only traverse the bloom folder")
   106  		}
   107  	}
   108  
   109  	return nil
   110  }
   111  
   112  // TODO: This should accept unresolved block ranges, not lists of block numbers
   113  // This routine accepts the 'resolved' block numbers. If, instead, it received the unresolved block ranges
   114  // from the command line, it would be much more efficient. Using resolved block numbers means we have to
   115  // provide a block range that is fine-grained enough to hit on every file inside the range. For example, if
   116  // there are 100 files in the range 100000-200000, we need to create block numbers that cover every
   117  // eventuallity. If on of the files has a two block range, we need to generate 50,000 block numbers. If we
   118  // used the range on the command line instead we'd only have to intersect one range.
   119  
   120  func (walker *CacheWalker) shouldDisplay(result CacheFileInfo, cnt int, blockNums []base.Blknum) bool {
   121  	if !IsCacheType(result.Path, result.Type, true /* checkExt */) {
   122  		return false
   123  	}
   124  
   125  	if walker.testMode && cnt > walker.maxTests {
   126  		return false
   127  	}
   128  
   129  	if len(blockNums) == 0 {
   130  		return true
   131  	}
   132  
   133  	hit := false
   134  	for _, bn := range blockNums {
   135  		h := result.FileRange.IntersectsB(bn)
   136  		hit = hit || h
   137  		if hit {
   138  			break
   139  		}
   140  	}
   141  
   142  	return hit
   143  }