github.com/klaytn/klaytn@v1.12.1/blockchain/bloombits/matcher.go (about)

     1  // Modifications Copyright 2018 The klaytn Authors
     2  // Copyright 2017 The go-ethereum Authors
     3  // This file is part of the go-ethereum library.
     4  //
     5  // The go-ethereum library is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Lesser General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // The go-ethereum library is distributed in the hope that it will be useful,
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  // GNU Lesser General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Lesser General Public License
    16  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    17  //
    18  // This file is derived from core/bloombits/matcher.go (2018/06/04).
    19  // Modified and improved for the klaytn development.
    20  
    21  package bloombits
    22  
    23  import (
    24  	"bytes"
    25  	"context"
    26  	"errors"
    27  	"math"
    28  	"sort"
    29  	"sync"
    30  	"sync/atomic"
    31  	"time"
    32  
    33  	"github.com/klaytn/klaytn/common/bitutil"
    34  	"github.com/klaytn/klaytn/crypto"
    35  )
    36  
    37  // bloomIndexes represents the bit indexes inside the bloom filter that belong
    38  // to some key.
    39  type bloomIndexes [3]uint
    40  
    41  // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key.
    42  func calcBloomIndexes(b []byte) bloomIndexes {
    43  	b = crypto.Keccak256(b)
    44  
    45  	var idxs bloomIndexes
    46  	for i := 0; i < len(idxs); i++ {
    47  		idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1])
    48  	}
    49  	return idxs
    50  }
    51  
    52  // partialMatches with a non-nil vector represents a section in which some sub-
    53  // matchers have already found potential matches. Subsequent sub-matchers will
    54  // binary AND their matches with this vector. If vector is nil, it represents a
    55  // section to be processed by the first sub-matcher.
    56  type partialMatches struct {
    57  	section uint64
    58  	bitset  []byte
    59  }
    60  
    61  // Retrieval represents a request for retrieval task assignments for a given
    62  // bit with the given number of fetch elements, or a response for such a request.
    63  // It can also have the actual results set to be used as a delivery data struct.
    64  //
    65  // The contest and error fields are used by the light client to terminate matching
    66  // early if an error is encountered on some path of the pipeline.
    67  type Retrieval struct {
    68  	Bit      uint
    69  	Sections []uint64
    70  	Bitsets  [][]byte
    71  
    72  	Context context.Context
    73  	Error   error
    74  }
    75  
    76  // Matcher is a pipelined system of schedulers and logic matchers which perform
    77  // binary AND/OR operations on the bit-streams, creating a stream of potential
    78  // blocks to inspect for data content.
    79  type Matcher struct {
    80  	sectionSize uint64 // Size of the data batches to filter on
    81  
    82  	filters    [][]bloomIndexes    // Filter the system is matching for
    83  	schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits
    84  
    85  	retrievers chan chan uint       // Retriever processes waiting for bit allocations
    86  	counters   chan chan uint       // Retriever processes waiting for task count reports
    87  	retrievals chan chan *Retrieval // Retriever processes waiting for task allocations
    88  	deliveries chan *Retrieval      // Retriever processes waiting for task response deliveries
    89  
    90  	running uint32 // Atomic flag whether a session is live or not
    91  }
    92  
    93  // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing
    94  // address and topic filtering on them. Setting a filter component to `nil` is
    95  // allowed and will result in that filter rule being skipped (OR 0x11...1).
    96  func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher {
    97  	// Create the matcher instance
    98  	m := &Matcher{
    99  		sectionSize: sectionSize,
   100  		schedulers:  make(map[uint]*scheduler),
   101  		retrievers:  make(chan chan uint),
   102  		counters:    make(chan chan uint),
   103  		retrievals:  make(chan chan *Retrieval),
   104  		deliveries:  make(chan *Retrieval),
   105  	}
   106  	// Calculate the bloom bit indexes for the groups we're interested in
   107  	m.filters = nil
   108  
   109  	for _, filter := range filters {
   110  		// Gather the bit indexes of the filter rule, special casing the nil filter
   111  		if len(filter) == 0 {
   112  			continue
   113  		}
   114  		bloomBits := make([]bloomIndexes, len(filter))
   115  		for i, clause := range filter {
   116  			if clause == nil {
   117  				bloomBits = nil
   118  				break
   119  			}
   120  			bloomBits[i] = calcBloomIndexes(clause)
   121  		}
   122  		// Accumulate the filter rules if no nil rule was within
   123  		if bloomBits != nil {
   124  			m.filters = append(m.filters, bloomBits)
   125  		}
   126  	}
   127  	// For every bit, create a scheduler to load/download the bit vectors
   128  	for _, bloomIndexLists := range m.filters {
   129  		for _, bloomIndexList := range bloomIndexLists {
   130  			for _, bloomIndex := range bloomIndexList {
   131  				m.addScheduler(bloomIndex)
   132  			}
   133  		}
   134  	}
   135  	return m
   136  }
   137  
   138  // addScheduler adds a bit stream retrieval scheduler for the given bit index if
   139  // it has not existed before. If the bit is already selected for filtering, the
   140  // existing scheduler can be used.
   141  func (m *Matcher) addScheduler(idx uint) {
   142  	if _, ok := m.schedulers[idx]; ok {
   143  		return
   144  	}
   145  	m.schedulers[idx] = newScheduler(idx)
   146  }
   147  
   148  // Start starts the matching process and returns a stream of bloom matches in
   149  // a given range of blocks. If there are no more matches in the range, the result
   150  // channel is closed.
   151  func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) {
   152  	// Make sure we're not creating concurrent sessions
   153  	if atomic.SwapUint32(&m.running, 1) == 1 {
   154  		return nil, errors.New("matcher already running")
   155  	}
   156  	defer atomic.StoreUint32(&m.running, 0)
   157  
   158  	// Initiate a new matching round
   159  	session := &MatcherSession{
   160  		matcher: m,
   161  		quit:    make(chan struct{}),
   162  		kill:    make(chan struct{}),
   163  		ctx:     ctx,
   164  	}
   165  	for _, scheduler := range m.schedulers {
   166  		scheduler.reset()
   167  	}
   168  	sink := m.run(begin, end, cap(results), session)
   169  
   170  	// Read the output from the result sink and deliver to the user
   171  	session.pend.Add(1)
   172  	go func() {
   173  		defer session.pend.Done()
   174  		defer close(results)
   175  
   176  		for {
   177  			select {
   178  			case <-session.quit:
   179  				return
   180  
   181  			case res, ok := <-sink:
   182  				// New match result found
   183  				if !ok {
   184  					return
   185  				}
   186  				// Calculate the first and last blocks of the section
   187  				sectionStart := res.section * m.sectionSize
   188  
   189  				first := sectionStart
   190  				if begin > first {
   191  					first = begin
   192  				}
   193  				last := sectionStart + m.sectionSize - 1
   194  				if end < last {
   195  					last = end
   196  				}
   197  				// Iterate over all the blocks in the section and return the matching ones
   198  				for i := first; i <= last; i++ {
   199  					// Skip the entire byte if no matches are found inside (and we're processing an entire byte!)
   200  					next := res.bitset[(i-sectionStart)/8]
   201  					if next == 0 {
   202  						if i%8 == 0 {
   203  							i += 7
   204  						}
   205  						continue
   206  					}
   207  					// Some bit it set, do the actual submatching
   208  					if bit := 7 - i%8; next&(1<<bit) != 0 {
   209  						select {
   210  						case <-session.quit:
   211  							return
   212  						case results <- i:
   213  						}
   214  					}
   215  				}
   216  			}
   217  		}
   218  	}()
   219  	return session, nil
   220  }
   221  
   222  // run creates a daisy-chain of sub-matchers, one for the address set and one
   223  // for each topic set, each sub-matcher receiving a section only if the previous
   224  // ones have all found a potential match in one of the blocks of the section,
   225  // then binary AND-ing its own matches and forwarding the result to the next one.
   226  //
   227  // The method starts feeding the section indexes into the first sub-matcher on a
   228  // new goroutine and returns a sink channel receiving the results.
   229  func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches {
   230  	// Create the source channel and feed section indexes into
   231  	source := make(chan *partialMatches, buffer)
   232  
   233  	session.pend.Add(1)
   234  	go func() {
   235  		defer session.pend.Done()
   236  		defer close(source)
   237  
   238  		for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ {
   239  			select {
   240  			case <-session.quit:
   241  				return
   242  			case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}:
   243  			}
   244  		}
   245  	}()
   246  	// Assemble the daisy-chained filtering pipeline
   247  	next := source
   248  	dist := make(chan *request, buffer)
   249  
   250  	for _, bloom := range m.filters {
   251  		next = m.subMatch(next, dist, bloom, session)
   252  	}
   253  	// Start the request distribution
   254  	session.pend.Add(1)
   255  	go m.distributor(dist, session)
   256  
   257  	return next
   258  }
   259  
   260  // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then
   261  // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output.
   262  // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to
   263  // that address/topic, and binary AND-ing those vectors together.
   264  func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches {
   265  	// Start the concurrent schedulers for each bit required by the bloom filter
   266  	sectionSources := make([][3]chan uint64, len(bloom))
   267  	sectionSinks := make([][3]chan []byte, len(bloom))
   268  	for i, bits := range bloom {
   269  		for j, bit := range bits {
   270  			sectionSources[i][j] = make(chan uint64, cap(source))
   271  			sectionSinks[i][j] = make(chan []byte, cap(source))
   272  
   273  			m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend)
   274  		}
   275  	}
   276  
   277  	process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated
   278  	results := make(chan *partialMatches, cap(source))
   279  
   280  	session.pend.Add(2)
   281  	go func() {
   282  		// Tear down the goroutine and terminate all source channels
   283  		defer session.pend.Done()
   284  		defer close(process)
   285  
   286  		defer func() {
   287  			for _, bloomSources := range sectionSources {
   288  				for _, bitSource := range bloomSources {
   289  					close(bitSource)
   290  				}
   291  			}
   292  		}()
   293  		// Read sections from the source channel and multiplex into all bit-schedulers
   294  		for {
   295  			select {
   296  			case <-session.quit:
   297  				return
   298  
   299  			case subres, ok := <-source:
   300  				// New subresult from previous link
   301  				if !ok {
   302  					return
   303  				}
   304  				// Multiplex the section index to all bit-schedulers
   305  				for _, bloomSources := range sectionSources {
   306  					for _, bitSource := range bloomSources {
   307  						select {
   308  						case <-session.quit:
   309  							return
   310  						case bitSource <- subres.section:
   311  						}
   312  					}
   313  				}
   314  				// Notify the processor that this section will become available
   315  				select {
   316  				case <-session.quit:
   317  					return
   318  				case process <- subres:
   319  				}
   320  			}
   321  		}
   322  	}()
   323  
   324  	go func() {
   325  		// Tear down the goroutine and terminate the final sink channel
   326  		defer session.pend.Done()
   327  		defer close(results)
   328  
   329  		// Read the source notifications and collect the delivered results
   330  		for {
   331  			select {
   332  			case <-session.quit:
   333  				return
   334  
   335  			case subres, ok := <-process:
   336  				// Notified of a section being retrieved
   337  				if !ok {
   338  					return
   339  				}
   340  				// Gather all the sub-results and merge them together
   341  				var orVector []byte
   342  				for _, bloomSinks := range sectionSinks {
   343  					var andVector []byte
   344  					for _, bitSink := range bloomSinks {
   345  						var data []byte
   346  						select {
   347  						case <-session.quit:
   348  							return
   349  						case data = <-bitSink:
   350  						}
   351  						if andVector == nil {
   352  							andVector = make([]byte, int(m.sectionSize/8))
   353  							copy(andVector, data)
   354  						} else {
   355  							bitutil.ANDBytes(andVector, andVector, data)
   356  						}
   357  					}
   358  					if orVector == nil {
   359  						orVector = andVector
   360  					} else {
   361  						bitutil.ORBytes(orVector, orVector, andVector)
   362  					}
   363  				}
   364  
   365  				if orVector == nil {
   366  					orVector = make([]byte, int(m.sectionSize/8))
   367  				}
   368  				if subres.bitset != nil {
   369  					bitutil.ANDBytes(orVector, orVector, subres.bitset)
   370  				}
   371  				if bitutil.TestBytes(orVector) {
   372  					select {
   373  					case <-session.quit:
   374  						return
   375  					case results <- &partialMatches{subres.section, orVector}:
   376  					}
   377  				}
   378  			}
   379  		}
   380  	}()
   381  	return results
   382  }
   383  
   384  // distributor receives requests from the schedulers and queues them into a set
   385  // of pending requests, which are assigned to retrievers wanting to fulfil them.
   386  func (m *Matcher) distributor(dist chan *request, session *MatcherSession) {
   387  	defer session.pend.Done()
   388  
   389  	var (
   390  		requests   = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number
   391  		unallocs   = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever
   392  		retrievers chan chan uint            // Waiting retrievers (toggled to nil if unallocs is empty)
   393  	)
   394  	var (
   395  		allocs   int            // Number of active allocations to handle graceful shutdown requests
   396  		shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests
   397  	)
   398  
   399  	// assign is a helper method fo try to assign a pending bit an actively
   400  	// listening servicer, or schedule it up for later when one arrives.
   401  	assign := func(bit uint) {
   402  		select {
   403  		case fetcher := <-m.retrievers:
   404  			allocs++
   405  			fetcher <- bit
   406  		default:
   407  			// No retrievers active, start listening for new ones
   408  			retrievers = m.retrievers
   409  			unallocs[bit] = struct{}{}
   410  		}
   411  	}
   412  
   413  	for {
   414  		select {
   415  		case <-shutdown:
   416  			// Graceful shutdown requested, wait until all pending requests are honoured
   417  			if allocs == 0 {
   418  				return
   419  			}
   420  			shutdown = nil
   421  
   422  		case <-session.kill:
   423  			// Pending requests not honoured in time, hard terminate
   424  			return
   425  
   426  		case req := <-dist:
   427  			// New retrieval request arrived to be distributed to some fetcher process
   428  			queue := requests[req.bit]
   429  			index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section })
   430  			requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...)
   431  
   432  			// If it's a new bit and we have waiting fetchers, allocate to them
   433  			if len(queue) == 0 {
   434  				assign(req.bit)
   435  			}
   436  
   437  		case fetcher := <-retrievers:
   438  			// New retriever arrived, find the lowest section-ed bit to assign
   439  			bit, best := uint(0), uint64(math.MaxUint64)
   440  			for idx := range unallocs {
   441  				if requests[idx][0] < best {
   442  					bit, best = idx, requests[idx][0]
   443  				}
   444  			}
   445  			// Stop tracking this bit (and alloc notifications if no more work is available)
   446  			delete(unallocs, bit)
   447  			if len(unallocs) == 0 {
   448  				retrievers = nil
   449  			}
   450  			allocs++
   451  			fetcher <- bit
   452  
   453  		case fetcher := <-m.counters:
   454  			// New task count request arrives, return number of items
   455  			fetcher <- uint(len(requests[<-fetcher]))
   456  
   457  		case fetcher := <-m.retrievals:
   458  			// New fetcher waiting for tasks to retrieve, assign
   459  			task := <-fetcher
   460  			if want := len(task.Sections); want >= len(requests[task.Bit]) {
   461  				task.Sections = requests[task.Bit]
   462  				delete(requests, task.Bit)
   463  			} else {
   464  				task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...)
   465  				requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...)
   466  			}
   467  			fetcher <- task
   468  
   469  			// If anything was left unallocated, try to assign to someone else
   470  			if len(requests[task.Bit]) > 0 {
   471  				assign(task.Bit)
   472  			}
   473  
   474  		case result := <-m.deliveries:
   475  			// New retrieval task response from fetcher, split out missing sections and
   476  			// deliver complete ones
   477  			var (
   478  				sections = make([]uint64, 0, len(result.Sections))
   479  				bitsets  = make([][]byte, 0, len(result.Bitsets))
   480  				missing  = make([]uint64, 0, len(result.Sections))
   481  			)
   482  			for i, bitset := range result.Bitsets {
   483  				if len(bitset) == 0 {
   484  					missing = append(missing, result.Sections[i])
   485  					continue
   486  				}
   487  				sections = append(sections, result.Sections[i])
   488  				bitsets = append(bitsets, bitset)
   489  			}
   490  			m.schedulers[result.Bit].deliver(sections, bitsets)
   491  			allocs--
   492  
   493  			// Reschedule missing sections and allocate bit if newly available
   494  			if len(missing) > 0 {
   495  				queue := requests[result.Bit]
   496  				for _, section := range missing {
   497  					index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section })
   498  					queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...)
   499  				}
   500  				requests[result.Bit] = queue
   501  
   502  				if len(queue) == len(missing) {
   503  					assign(result.Bit)
   504  				}
   505  			}
   506  			// If we're in the process of shutting down, terminate
   507  			if allocs == 0 && shutdown == nil {
   508  				return
   509  			}
   510  		}
   511  	}
   512  }
   513  
   514  // MatcherSession is returned by a started matcher to be used as a terminator
   515  // for the actively running matching operation.
   516  type MatcherSession struct {
   517  	matcher *Matcher
   518  
   519  	closer sync.Once     // Sync object to ensure we only ever close once
   520  	quit   chan struct{} // Quit channel to request pipeline termination
   521  	kill   chan struct{} // Term channel to signal non-graceful forced shutdown
   522  
   523  	ctx context.Context // Context used by the light client to abort filtering
   524  	err atomic.Value    // Global error to track retrieval failures deep in the chain
   525  
   526  	pend sync.WaitGroup
   527  }
   528  
   529  // Close stops the matching process and waits for all subprocesses to terminate
   530  // before returning. The timeout may be used for graceful shutdown, allowing the
   531  // currently running retrievals to complete before this time.
   532  func (s *MatcherSession) Close() {
   533  	s.closer.Do(func() {
   534  		// Signal termination and wait for all goroutines to tear down
   535  		close(s.quit)
   536  		time.AfterFunc(time.Second, func() { close(s.kill) })
   537  		s.pend.Wait()
   538  	})
   539  }
   540  
   541  // Error returns any failure encountered during the matching session.
   542  func (s *MatcherSession) Error() error {
   543  	if err := s.err.Load(); err != nil {
   544  		return err.(error)
   545  	}
   546  	return nil
   547  }
   548  
   549  // AllocateRetrieval assigns a bloom bit index to a client process that can either
   550  // immediately request and fetch the section contents assigned to this bit or wait
   551  // a little while for more sections to be requested.
   552  func (s *MatcherSession) AllocateRetrieval() (uint, bool) {
   553  	fetcher := make(chan uint)
   554  
   555  	select {
   556  	case <-s.quit:
   557  		return 0, false
   558  	case s.matcher.retrievers <- fetcher:
   559  		bit, ok := <-fetcher
   560  		return bit, ok
   561  	}
   562  }
   563  
   564  // PendingSections returns the number of pending section retrievals belonging to
   565  // the given bloom bit index.
   566  func (s *MatcherSession) PendingSections(bit uint) int {
   567  	fetcher := make(chan uint)
   568  
   569  	select {
   570  	case <-s.quit:
   571  		return 0
   572  	case s.matcher.counters <- fetcher:
   573  		fetcher <- bit
   574  		return int(<-fetcher)
   575  	}
   576  }
   577  
   578  // AllocateSections assigns all or part of an already allocated bit-task queue
   579  // to the requesting process.
   580  func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 {
   581  	fetcher := make(chan *Retrieval)
   582  
   583  	select {
   584  	case <-s.quit:
   585  		return nil
   586  	case s.matcher.retrievals <- fetcher:
   587  		task := &Retrieval{
   588  			Bit:      bit,
   589  			Sections: make([]uint64, count),
   590  		}
   591  		fetcher <- task
   592  		return (<-fetcher).Sections
   593  	}
   594  }
   595  
   596  // DeliverSections delivers a batch of section bit-vectors for a specific bloom
   597  // bit index to be injected into the processing pipeline.
   598  func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) {
   599  	select {
   600  	case <-s.kill:
   601  		return
   602  	case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}:
   603  	}
   604  }
   605  
   606  // Multiplex polls the matcher session for retrieval tasks and multiplexes it into
   607  // the requested retrieval queue to be serviced together with other sessions.
   608  //
   609  // This method will block for the lifetime of the session. Even after termination
   610  // of the session, any request in-flight need to be responded to! Empty responses
   611  // are fine though in that case.
   612  func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) {
   613  	for {
   614  		// Allocate a new bloom bit index to retrieve data for, stopping when done
   615  		bit, ok := s.AllocateRetrieval()
   616  		if !ok {
   617  			return
   618  		}
   619  		// Bit allocated, throttle a bit if we're below our batch limit
   620  		if s.PendingSections(bit) < batch {
   621  			select {
   622  			case <-s.quit:
   623  				// Session terminating, we can't meaningfully service, abort
   624  				s.AllocateSections(bit, 0)
   625  				s.DeliverSections(bit, []uint64{}, [][]byte{})
   626  				return
   627  
   628  			case <-time.After(wait):
   629  				// Throttling up, fetch whatever's available
   630  			}
   631  		}
   632  		// Allocate as much as we can handle and request servicing
   633  		sections := s.AllocateSections(bit, batch)
   634  		request := make(chan *Retrieval)
   635  
   636  		select {
   637  		case <-s.quit:
   638  			// Session terminating, we can't meaningfully service, abort
   639  			s.DeliverSections(bit, sections, make([][]byte, len(sections)))
   640  			return
   641  
   642  		case mux <- request:
   643  			// Retrieval accepted, something must arrive before we're aborting
   644  			request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx}
   645  
   646  			result := <-request
   647  			if result.Error != nil {
   648  				s.err.Store(result.Error)
   649  				s.Close()
   650  			}
   651  			s.DeliverSections(result.Bit, result.Sections, result.Bitsets)
   652  		}
   653  	}
   654  }