github.com/waltonchain/waltonchain_gwtc_src@v1.1.4-0.20201225072101-8a298c95a819/core/bloombits/matcher.go (about)

     1  // Copyright 2017 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-wtc library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-wtc library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package bloombits
    18  
    19  import (
    20  	"bytes"
    21  	"errors"
    22  	"math"
    23  	"sort"
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	"github.com/wtc/go-wtc/common/bitutil"
    29  	"github.com/wtc/go-wtc/crypto"
    30  )
    31  
    32  // bloomIndexes represents the bit indexes inside the bloom filter that belong
    33  // to some key.
    34  type bloomIndexes [3]uint
    35  
    36  // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key.
    37  func calcBloomIndexes(b []byte) bloomIndexes {
    38  	b = crypto.Keccak256(b)
    39  
    40  	var idxs bloomIndexes
    41  	for i := 0; i < len(idxs); i++ {
    42  		idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1])
    43  	}
    44  	return idxs
    45  }
    46  
    47  // partialMatches with a non-nil vector represents a section in which some sub-
    48  // matchers have already found potential matches. Subsequent sub-matchers will
    49  // binary AND their matches with this vector. If vector is nil, it represents a
    50  // section to be processed by the first sub-matcher.
    51  type partialMatches struct {
    52  	section uint64
    53  	bitset  []byte
    54  }
    55  
    56  // Retrieval represents a request for retrieval task assignments for a given
    57  // bit with the given number of fetch elements, or a response for such a request.
    58  // It can also have the actual results set to be used as a delivery data struct.
    59  type Retrieval struct {
    60  	Bit      uint
    61  	Sections []uint64
    62  	Bitsets  [][]byte
    63  }
    64  
    65  // Matcher is a pipelined system of schedulers and logic matchers which perform
    66  // binary AND/OR operations on the bit-streams, creating a stream of potential
    67  // blocks to inspect for data content.
    68  type Matcher struct {
    69  	sectionSize uint64 // Size of the data batches to filter on
    70  
    71  	filters    [][]bloomIndexes    // Filter the system is matching for
    72  	schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits
    73  
    74  	retrievers chan chan uint       // Retriever processes waiting for bit allocations
    75  	counters   chan chan uint       // Retriever processes waiting for task count reports
    76  	retrievals chan chan *Retrieval // Retriever processes waiting for task allocations
    77  	deliveries chan *Retrieval      // Retriever processes waiting for task response deliveries
    78  
    79  	running uint32 // Atomic flag whether a session is live or not
    80  }
    81  
    82  // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing
    83  // address and topic filtering on them. Setting a filter component to `nil` is
    84  // allowed and will result in that filter rule being skipped (OR 0x11...1).
    85  func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher {
    86  	// Create the matcher instance
    87  	m := &Matcher{
    88  		sectionSize: sectionSize,
    89  		schedulers:  make(map[uint]*scheduler),
    90  		retrievers:  make(chan chan uint),
    91  		counters:    make(chan chan uint),
    92  		retrievals:  make(chan chan *Retrieval),
    93  		deliveries:  make(chan *Retrieval),
    94  	}
    95  	// Calculate the bloom bit indexes for the groups we're interested in
    96  	m.filters = nil
    97  
    98  	for _, filter := range filters {
    99  		// Gather the bit indexes of the filter rule, special casing the nil filter
   100  		if len(filter) == 0 {
   101  			continue
   102  		}
   103  		bloomBits := make([]bloomIndexes, len(filter))
   104  		for i, clause := range filter {
   105  			if clause == nil {
   106  				bloomBits = nil
   107  				break
   108  			}
   109  			bloomBits[i] = calcBloomIndexes(clause)
   110  		}
   111  		// Accumulate the filter rules if no nil rule was within
   112  		if bloomBits != nil {
   113  			m.filters = append(m.filters, bloomBits)
   114  		}
   115  	}
   116  	// For every bit, create a scheduler to load/download the bit vectors
   117  	for _, bloomIndexLists := range m.filters {
   118  		for _, bloomIndexList := range bloomIndexLists {
   119  			for _, bloomIndex := range bloomIndexList {
   120  				m.addScheduler(bloomIndex)
   121  			}
   122  		}
   123  	}
   124  	return m
   125  }
   126  
   127  // addScheduler adds a bit stream retrieval scheduler for the given bit index if
   128  // it has not existed before. If the bit is already selected for filtering, the
   129  // existing scheduler can be used.
   130  func (m *Matcher) addScheduler(idx uint) {
   131  	if _, ok := m.schedulers[idx]; ok {
   132  		return
   133  	}
   134  	m.schedulers[idx] = newScheduler(idx)
   135  }
   136  
   137  // Start starts the matching process and returns a stream of bloom matches in
   138  // a given range of blocks. If there are no more matches in the range, the result
   139  // channel is closed.
   140  func (m *Matcher) Start(begin, end uint64, results chan uint64) (*MatcherSession, error) {
   141  	// Make sure we're not creating concurrent sessions
   142  	if atomic.SwapUint32(&m.running, 1) == 1 {
   143  		return nil, errors.New("matcher already running")
   144  	}
   145  	defer atomic.StoreUint32(&m.running, 0)
   146  
   147  	// Initiate a new matching round
   148  	session := &MatcherSession{
   149  		matcher: m,
   150  		quit:    make(chan struct{}),
   151  		kill:    make(chan struct{}),
   152  	}
   153  	for _, scheduler := range m.schedulers {
   154  		scheduler.reset()
   155  	}
   156  	sink := m.run(begin, end, cap(results), session)
   157  
   158  	// Read the output from the result sink and deliver to the user
   159  	session.pend.Add(1)
   160  	go func() {
   161  		defer session.pend.Done()
   162  		defer close(results)
   163  
   164  		for {
   165  			select {
   166  			case <-session.quit:
   167  				return
   168  
   169  			case res, ok := <-sink:
   170  				// New match result found
   171  				if !ok {
   172  					return
   173  				}
   174  				// Calculate the first and last blocks of the section
   175  				sectionStart := res.section * m.sectionSize
   176  
   177  				first := sectionStart
   178  				if begin > first {
   179  					first = begin
   180  				}
   181  				last := sectionStart + m.sectionSize - 1
   182  				if end < last {
   183  					last = end
   184  				}
   185  				// Iterate over all the blocks in the section and return the matching ones
   186  				for i := first; i <= last; i++ {
   187  					// Skip the entire byte if no matches are found inside
   188  					next := res.bitset[(i-sectionStart)/8]
   189  					if next == 0 {
   190  						i += 7
   191  						continue
   192  					}
   193  					// Some bit it set, do the actual submatching
   194  					if bit := 7 - i%8; next&(1<<bit) != 0 {
   195  						select {
   196  						case <-session.quit:
   197  							return
   198  						case results <- i:
   199  						}
   200  					}
   201  				}
   202  			}
   203  		}
   204  	}()
   205  	return session, nil
   206  }
   207  
   208  // run creates a daisy-chain of sub-matchers, one for the address set and one
   209  // for each topic set, each sub-matcher receiving a section only if the previous
   210  // ones have all found a potential match in one of the blocks of the section,
   211  // then binary AND-ing its own matches and forwaring the result to the next one.
   212  //
   213  // The method starts feeding the section indexes into the first sub-matcher on a
   214  // new goroutine and returns a sink channel receiving the results.
   215  func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches {
   216  	// Create the source channel and feed section indexes into
   217  	source := make(chan *partialMatches, buffer)
   218  
   219  	session.pend.Add(1)
   220  	go func() {
   221  		defer session.pend.Done()
   222  		defer close(source)
   223  
   224  		for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ {
   225  			select {
   226  			case <-session.quit:
   227  				return
   228  			case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}:
   229  			}
   230  		}
   231  	}()
   232  	// Assemble the daisy-chained filtering pipeline
   233  	next := source
   234  	dist := make(chan *request, buffer)
   235  
   236  	for _, bloom := range m.filters {
   237  		next = m.subMatch(next, dist, bloom, session)
   238  	}
   239  	// Start the request distribution
   240  	session.pend.Add(1)
   241  	go m.distributor(dist, session)
   242  
   243  	return next
   244  }
   245  
   246  // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then
   247  // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output.
   248  // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to
   249  // that address/topic, and binary AND-ing those vectors together.
   250  func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches {
   251  	// Start the concurrent schedulers for each bit required by the bloom filter
   252  	sectionSources := make([][3]chan uint64, len(bloom))
   253  	sectionSinks := make([][3]chan []byte, len(bloom))
   254  	for i, bits := range bloom {
   255  		for j, bit := range bits {
   256  			sectionSources[i][j] = make(chan uint64, cap(source))
   257  			sectionSinks[i][j] = make(chan []byte, cap(source))
   258  
   259  			m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend)
   260  		}
   261  	}
   262  
   263  	process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated
   264  	results := make(chan *partialMatches, cap(source))
   265  
   266  	session.pend.Add(2)
   267  	go func() {
   268  		// Tear down the goroutine and terminate all source channels
   269  		defer session.pend.Done()
   270  		defer close(process)
   271  
   272  		defer func() {
   273  			for _, bloomSources := range sectionSources {
   274  				for _, bitSource := range bloomSources {
   275  					close(bitSource)
   276  				}
   277  			}
   278  		}()
   279  		// Read sections from the source channel and multiplex into all bit-schedulers
   280  		for {
   281  			select {
   282  			case <-session.quit:
   283  				return
   284  
   285  			case subres, ok := <-source:
   286  				// New subresult from previous link
   287  				if !ok {
   288  					return
   289  				}
   290  				// Multiplex the section index to all bit-schedulers
   291  				for _, bloomSources := range sectionSources {
   292  					for _, bitSource := range bloomSources {
   293  						select {
   294  						case <-session.quit:
   295  							return
   296  						case bitSource <- subres.section:
   297  						}
   298  					}
   299  				}
   300  				// Notify the processor that this section will become available
   301  				select {
   302  				case <-session.quit:
   303  					return
   304  				case process <- subres:
   305  				}
   306  			}
   307  		}
   308  	}()
   309  
   310  	go func() {
   311  		// Tear down the goroutine and terminate the final sink channel
   312  		defer session.pend.Done()
   313  		defer close(results)
   314  
   315  		// Read the source notifications and collect the delivered results
   316  		for {
   317  			select {
   318  			case <-session.quit:
   319  				return
   320  
   321  			case subres, ok := <-process:
   322  				// Notified of a section being retrieved
   323  				if !ok {
   324  					return
   325  				}
   326  				// Gather all the sub-results and merge them together
   327  				var orVector []byte
   328  				for _, bloomSinks := range sectionSinks {
   329  					var andVector []byte
   330  					for _, bitSink := range bloomSinks {
   331  						var data []byte
   332  						select {
   333  						case <-session.quit:
   334  							return
   335  						case data = <-bitSink:
   336  						}
   337  						if andVector == nil {
   338  							andVector = make([]byte, int(m.sectionSize/8))
   339  							copy(andVector, data)
   340  						} else {
   341  							bitutil.ANDBytes(andVector, andVector, data)
   342  						}
   343  					}
   344  					if orVector == nil {
   345  						orVector = andVector
   346  					} else {
   347  						bitutil.ORBytes(orVector, orVector, andVector)
   348  					}
   349  				}
   350  
   351  				if orVector == nil {
   352  					orVector = make([]byte, int(m.sectionSize/8))
   353  				}
   354  				if subres.bitset != nil {
   355  					bitutil.ANDBytes(orVector, orVector, subres.bitset)
   356  				}
   357  				if bitutil.TestBytes(orVector) {
   358  					select {
   359  					case <-session.quit:
   360  						return
   361  					case results <- &partialMatches{subres.section, orVector}:
   362  					}
   363  				}
   364  			}
   365  		}
   366  	}()
   367  	return results
   368  }
   369  
   370  // distributor receives requests from the schedulers and queues them into a set
   371  // of pending requests, which are assigned to retrievers wanting to fulfil them.
   372  func (m *Matcher) distributor(dist chan *request, session *MatcherSession) {
   373  	defer session.pend.Done()
   374  
   375  	var (
   376  		requests   = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number
   377  		unallocs   = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever
   378  		retrievers chan chan uint            // Waiting retrievers (toggled to nil if unallocs is empty)
   379  	)
   380  	var (
   381  		allocs   int            // Number of active allocations to handle graceful shutdown requests
   382  		shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests
   383  	)
   384  
   385  	// assign is a helper method fo try to assign a pending bit an an actively
   386  	// listening servicer, or schedule it up for later when one arrives.
   387  	assign := func(bit uint) {
   388  		select {
   389  		case fetcher := <-m.retrievers:
   390  			allocs++
   391  			fetcher <- bit
   392  		default:
   393  			// No retrievers active, start listening for new ones
   394  			retrievers = m.retrievers
   395  			unallocs[bit] = struct{}{}
   396  		}
   397  	}
   398  
   399  	for {
   400  		select {
   401  		case <-shutdown:
   402  			// Graceful shutdown requested, wait until all pending requests are honoured
   403  			if allocs == 0 {
   404  				return
   405  			}
   406  			shutdown = nil
   407  
   408  		case <-session.kill:
   409  			// Pending requests not honoured in time, hard terminate
   410  			return
   411  
   412  		case req := <-dist:
   413  			// New retrieval request arrived to be distributed to some fetcher process
   414  			queue := requests[req.bit]
   415  			index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section })
   416  			requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...)
   417  
   418  			// If it's a new bit and we have waiting fetchers, allocate to them
   419  			if len(queue) == 0 {
   420  				assign(req.bit)
   421  			}
   422  
   423  		case fetcher := <-retrievers:
   424  			// New retriever arrived, find the lowest section-ed bit to assign
   425  			bit, best := uint(0), uint64(math.MaxUint64)
   426  			for idx := range unallocs {
   427  				if requests[idx][0] < best {
   428  					bit, best = idx, requests[idx][0]
   429  				}
   430  			}
   431  			// Stop tracking this bit (and alloc notifications if no more work is available)
   432  			delete(unallocs, bit)
   433  			if len(unallocs) == 0 {
   434  				retrievers = nil
   435  			}
   436  			allocs++
   437  			fetcher <- bit
   438  
   439  		case fetcher := <-m.counters:
   440  			// New task count request arrives, return number of items
   441  			fetcher <- uint(len(requests[<-fetcher]))
   442  
   443  		case fetcher := <-m.retrievals:
   444  			// New fetcher waiting for tasks to retrieve, assign
   445  			task := <-fetcher
   446  			if want := len(task.Sections); want >= len(requests[task.Bit]) {
   447  				task.Sections = requests[task.Bit]
   448  				delete(requests, task.Bit)
   449  			} else {
   450  				task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...)
   451  				requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...)
   452  			}
   453  			fetcher <- task
   454  
   455  			// If anything was left unallocated, try to assign to someone else
   456  			if len(requests[task.Bit]) > 0 {
   457  				assign(task.Bit)
   458  			}
   459  
   460  		case result := <-m.deliveries:
   461  			// New retrieval task response from fetcher, split out missing sections and
   462  			// deliver complete ones
   463  			var (
   464  				sections = make([]uint64, 0, len(result.Sections))
   465  				bitsets  = make([][]byte, 0, len(result.Bitsets))
   466  				missing  = make([]uint64, 0, len(result.Sections))
   467  			)
   468  			for i, bitset := range result.Bitsets {
   469  				if len(bitset) == 0 {
   470  					missing = append(missing, result.Sections[i])
   471  					continue
   472  				}
   473  				sections = append(sections, result.Sections[i])
   474  				bitsets = append(bitsets, bitset)
   475  			}
   476  			m.schedulers[result.Bit].deliver(sections, bitsets)
   477  			allocs--
   478  
   479  			// Reschedule missing sections and allocate bit if newly available
   480  			if len(missing) > 0 {
   481  				queue := requests[result.Bit]
   482  				for _, section := range missing {
   483  					index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section })
   484  					queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...)
   485  				}
   486  				requests[result.Bit] = queue
   487  
   488  				if len(queue) == len(missing) {
   489  					assign(result.Bit)
   490  				}
   491  			}
   492  			// If we're in the process of shutting down, terminate
   493  			if allocs == 0 && shutdown == nil {
   494  				return
   495  			}
   496  		}
   497  	}
   498  }
   499  
   500  // MatcherSession is returned by a started matcher to be used as a terminator
   501  // for the actively running matching operation.
   502  type MatcherSession struct {
   503  	matcher *Matcher
   504  
   505  	quit chan struct{} // Quit channel to request pipeline termination
   506  	kill chan struct{} // Term channel to signal non-graceful forced shutdown
   507  	pend sync.WaitGroup
   508  }
   509  
   510  // Close stops the matching process and waits for all subprocesses to terminate
   511  // before returning. The timeout may be used for graceful shutdown, allowing the
   512  // currently running retrievals to complete before this time.
   513  func (s *MatcherSession) Close(timeout time.Duration) {
   514  	// Bail out if the matcher is not running
   515  	select {
   516  	case <-s.quit:
   517  		return
   518  	default:
   519  	}
   520  	// Signal termination and wait for all goroutines to tear down
   521  	close(s.quit)
   522  	time.AfterFunc(timeout, func() { close(s.kill) })
   523  	s.pend.Wait()
   524  }
   525  
   526  // AllocateRetrieval assigns a bloom bit index to a client process that can either
   527  // immediately reuest and fetch the section contents assigned to this bit or wait
   528  // a little while for more sections to be requested.
   529  func (s *MatcherSession) AllocateRetrieval() (uint, bool) {
   530  	fetcher := make(chan uint)
   531  
   532  	select {
   533  	case <-s.quit:
   534  		return 0, false
   535  	case s.matcher.retrievers <- fetcher:
   536  		bit, ok := <-fetcher
   537  		return bit, ok
   538  	}
   539  }
   540  
   541  // PendingSections returns the number of pending section retrievals belonging to
   542  // the given bloom bit index.
   543  func (s *MatcherSession) PendingSections(bit uint) int {
   544  	fetcher := make(chan uint)
   545  
   546  	select {
   547  	case <-s.quit:
   548  		return 0
   549  	case s.matcher.counters <- fetcher:
   550  		fetcher <- bit
   551  		return int(<-fetcher)
   552  	}
   553  }
   554  
   555  // AllocateSections assigns all or part of an already allocated bit-task queue
   556  // to the requesting process.
   557  func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 {
   558  	fetcher := make(chan *Retrieval)
   559  
   560  	select {
   561  	case <-s.quit:
   562  		return nil
   563  	case s.matcher.retrievals <- fetcher:
   564  		task := &Retrieval{
   565  			Bit:      bit,
   566  			Sections: make([]uint64, count),
   567  		}
   568  		fetcher <- task
   569  		return (<-fetcher).Sections
   570  	}
   571  }
   572  
   573  // DeliverSections delivers a batch of section bit-vectors for a specific bloom
   574  // bit index to be injected into the processing pipeline.
   575  func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) {
   576  	select {
   577  	case <-s.kill:
   578  		return
   579  	case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}:
   580  	}
   581  }
   582  
   583  // Multiplex polls the matcher session for rerieval tasks and multiplexes it into
   584  // the reuested retrieval queue to be serviced together with other sessions.
   585  //
   586  // This method will block for the lifetime of the session. Even after termination
   587  // of the session, any request in-flight need to be responded to! Empty responses
   588  // are fine though in that case.
   589  func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) {
   590  	for {
   591  		// Allocate a new bloom bit index to retrieve data for, stopping when done
   592  		bit, ok := s.AllocateRetrieval()
   593  		if !ok {
   594  			return
   595  		}
   596  		// Bit allocated, throttle a bit if we're below our batch limit
   597  		if s.PendingSections(bit) < batch {
   598  			select {
   599  			case <-s.quit:
   600  				// Session terminating, we can't meaningfully service, abort
   601  				s.AllocateSections(bit, 0)
   602  				s.DeliverSections(bit, []uint64{}, [][]byte{})
   603  				return
   604  
   605  			case <-time.After(wait):
   606  				// Throttling up, fetch whatever's available
   607  			}
   608  		}
   609  		// Allocate as much as we can handle and request servicing
   610  		sections := s.AllocateSections(bit, batch)
   611  		request := make(chan *Retrieval)
   612  
   613  		select {
   614  		case <-s.quit:
   615  			// Session terminating, we can't meaningfully service, abort
   616  			s.DeliverSections(bit, sections, make([][]byte, len(sections)))
   617  			return
   618  
   619  		case mux <- request:
   620  			// Retrieval accepted, something must arrive before we're aborting
   621  			request <- &Retrieval{Bit: bit, Sections: sections}
   622  
   623  			result := <-request
   624  			s.DeliverSections(result.Bit, result.Sections, result.Bitsets)
   625  		}
   626  	}
   627  }