github.com/vantum/vantum@v0.0.0-20180815184342-fe37d5f7a990/core/bloombits/matcher.go (about)

     1  // Copyright 2017 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package bloombits
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"errors"
    23  	"math"
    24  	"sort"
    25  	"sync"
    26  	"sync/atomic"
    27  	"time"
    28  
    29  	"github.com/vantum/vantum/common/bitutil"
    30  	"github.com/vantum/vantum/crypto"
    31  )
    32  
    33  // bloomIndexes represents the bit indexes inside the bloom filter that belong
    34  // to some key.
    35  type bloomIndexes [3]uint
    36  
    37  // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key.
    38  func calcBloomIndexes(b []byte) bloomIndexes {
    39  	b = crypto.Keccak256(b)
    40  
    41  	var idxs bloomIndexes
    42  	for i := 0; i < len(idxs); i++ {
    43  		idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1])
    44  	}
    45  	return idxs
    46  }
    47  
    48  // partialMatches with a non-nil vector represents a section in which some sub-
    49  // matchers have already found potential matches. Subsequent sub-matchers will
    50  // binary AND their matches with this vector. If vector is nil, it represents a
    51  // section to be processed by the first sub-matcher.
    52  type partialMatches struct {
    53  	section uint64
    54  	bitset  []byte
    55  }
    56  
    57  // Retrieval represents a request for retrieval task assignments for a given
    58  // bit with the given number of fetch elements, or a response for such a request.
    59  // It can also have the actual results set to be used as a delivery data struct.
    60  //
    61  // The contest and error fields are used by the light client to terminate matching
    62  // early if an error is enountered on some path of the pipeline.
    63  type Retrieval struct {
    64  	Bit      uint
    65  	Sections []uint64
    66  	Bitsets  [][]byte
    67  
    68  	Context context.Context
    69  	Error   error
    70  }
    71  
    72  // Matcher is a pipelined system of schedulers and logic matchers which perform
    73  // binary AND/OR operations on the bit-streams, creating a stream of potential
    74  // blocks to inspect for data content.
    75  type Matcher struct {
    76  	sectionSize uint64 // Size of the data batches to filter on
    77  
    78  	filters    [][]bloomIndexes    // Filter the system is matching for
    79  	schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits
    80  
    81  	retrievers chan chan uint       // Retriever processes waiting for bit allocations
    82  	counters   chan chan uint       // Retriever processes waiting for task count reports
    83  	retrievals chan chan *Retrieval // Retriever processes waiting for task allocations
    84  	deliveries chan *Retrieval      // Retriever processes waiting for task response deliveries
    85  
    86  	running uint32 // Atomic flag whether a session is live or not
    87  }
    88  
    89  // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing
    90  // address and topic filtering on them. Setting a filter component to `nil` is
    91  // allowed and will result in that filter rule being skipped (OR 0x11...1).
    92  func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher {
    93  	// Create the matcher instance
    94  	m := &Matcher{
    95  		sectionSize: sectionSize,
    96  		schedulers:  make(map[uint]*scheduler),
    97  		retrievers:  make(chan chan uint),
    98  		counters:    make(chan chan uint),
    99  		retrievals:  make(chan chan *Retrieval),
   100  		deliveries:  make(chan *Retrieval),
   101  	}
   102  	// Calculate the bloom bit indexes for the groups we're interested in
   103  	m.filters = nil
   104  
   105  	for _, filter := range filters {
   106  		// Gather the bit indexes of the filter rule, special casing the nil filter
   107  		if len(filter) == 0 {
   108  			continue
   109  		}
   110  		bloomBits := make([]bloomIndexes, len(filter))
   111  		for i, clause := range filter {
   112  			if clause == nil {
   113  				bloomBits = nil
   114  				break
   115  			}
   116  			bloomBits[i] = calcBloomIndexes(clause)
   117  		}
   118  		// Accumulate the filter rules if no nil rule was within
   119  		if bloomBits != nil {
   120  			m.filters = append(m.filters, bloomBits)
   121  		}
   122  	}
   123  	// For every bit, create a scheduler to load/download the bit vectors
   124  	for _, bloomIndexLists := range m.filters {
   125  		for _, bloomIndexList := range bloomIndexLists {
   126  			for _, bloomIndex := range bloomIndexList {
   127  				m.addScheduler(bloomIndex)
   128  			}
   129  		}
   130  	}
   131  	return m
   132  }
   133  
   134  // addScheduler adds a bit stream retrieval scheduler for the given bit index if
   135  // it has not existed before. If the bit is already selected for filtering, the
   136  // existing scheduler can be used.
   137  func (m *Matcher) addScheduler(idx uint) {
   138  	if _, ok := m.schedulers[idx]; ok {
   139  		return
   140  	}
   141  	m.schedulers[idx] = newScheduler(idx)
   142  }
   143  
   144  // Start starts the matching process and returns a stream of bloom matches in
   145  // a given range of blocks. If there are no more matches in the range, the result
   146  // channel is closed.
   147  func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) {
   148  	// Make sure we're not creating concurrent sessions
   149  	if atomic.SwapUint32(&m.running, 1) == 1 {
   150  		return nil, errors.New("matcher already running")
   151  	}
   152  	defer atomic.StoreUint32(&m.running, 0)
   153  
   154  	// Initiate a new matching round
   155  	session := &MatcherSession{
   156  		matcher: m,
   157  		quit:    make(chan struct{}),
   158  		kill:    make(chan struct{}),
   159  		ctx:     ctx,
   160  	}
   161  	for _, scheduler := range m.schedulers {
   162  		scheduler.reset()
   163  	}
   164  	sink := m.run(begin, end, cap(results), session)
   165  
   166  	// Read the output from the result sink and deliver to the user
   167  	session.pend.Add(1)
   168  	go func() {
   169  		defer session.pend.Done()
   170  		defer close(results)
   171  
   172  		for {
   173  			select {
   174  			case <-session.quit:
   175  				return
   176  
   177  			case res, ok := <-sink:
   178  				// New match result found
   179  				if !ok {
   180  					return
   181  				}
   182  				// Calculate the first and last blocks of the section
   183  				sectionStart := res.section * m.sectionSize
   184  
   185  				first := sectionStart
   186  				if begin > first {
   187  					first = begin
   188  				}
   189  				last := sectionStart + m.sectionSize - 1
   190  				if end < last {
   191  					last = end
   192  				}
   193  				// Iterate over all the blocks in the section and return the matching ones
   194  				for i := first; i <= last; i++ {
   195  					// Skip the entire byte if no matches are found inside (and we're processing an entire byte!)
   196  					next := res.bitset[(i-sectionStart)/8]
   197  					if next == 0 {
   198  						if i%8 == 0 {
   199  							i += 7
   200  						}
   201  						continue
   202  					}
   203  					// Some bit it set, do the actual submatching
   204  					if bit := 7 - i%8; next&(1<<bit) != 0 {
   205  						select {
   206  						case <-session.quit:
   207  							return
   208  						case results <- i:
   209  						}
   210  					}
   211  				}
   212  			}
   213  		}
   214  	}()
   215  	return session, nil
   216  }
   217  
   218  // run creates a daisy-chain of sub-matchers, one for the address set and one
   219  // for each topic set, each sub-matcher receiving a section only if the previous
   220  // ones have all found a potential match in one of the blocks of the section,
   221  // then binary AND-ing its own matches and forwaring the result to the next one.
   222  //
   223  // The method starts feeding the section indexes into the first sub-matcher on a
   224  // new goroutine and returns a sink channel receiving the results.
   225  func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches {
   226  	// Create the source channel and feed section indexes into
   227  	source := make(chan *partialMatches, buffer)
   228  
   229  	session.pend.Add(1)
   230  	go func() {
   231  		defer session.pend.Done()
   232  		defer close(source)
   233  
   234  		for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ {
   235  			select {
   236  			case <-session.quit:
   237  				return
   238  			case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}:
   239  			}
   240  		}
   241  	}()
   242  	// Assemble the daisy-chained filtering pipeline
   243  	next := source
   244  	dist := make(chan *request, buffer)
   245  
   246  	for _, bloom := range m.filters {
   247  		next = m.subMatch(next, dist, bloom, session)
   248  	}
   249  	// Start the request distribution
   250  	session.pend.Add(1)
   251  	go m.distributor(dist, session)
   252  
   253  	return next
   254  }
   255  
   256  // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then
   257  // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output.
   258  // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to
   259  // that address/topic, and binary AND-ing those vectors together.
   260  func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches {
   261  	// Start the concurrent schedulers for each bit required by the bloom filter
   262  	sectionSources := make([][3]chan uint64, len(bloom))
   263  	sectionSinks := make([][3]chan []byte, len(bloom))
   264  	for i, bits := range bloom {
   265  		for j, bit := range bits {
   266  			sectionSources[i][j] = make(chan uint64, cap(source))
   267  			sectionSinks[i][j] = make(chan []byte, cap(source))
   268  
   269  			m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend)
   270  		}
   271  	}
   272  
   273  	process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated
   274  	results := make(chan *partialMatches, cap(source))
   275  
   276  	session.pend.Add(2)
   277  	go func() {
   278  		// Tear down the goroutine and terminate all source channels
   279  		defer session.pend.Done()
   280  		defer close(process)
   281  
   282  		defer func() {
   283  			for _, bloomSources := range sectionSources {
   284  				for _, bitSource := range bloomSources {
   285  					close(bitSource)
   286  				}
   287  			}
   288  		}()
   289  		// Read sections from the source channel and multiplex into all bit-schedulers
   290  		for {
   291  			select {
   292  			case <-session.quit:
   293  				return
   294  
   295  			case subres, ok := <-source:
   296  				// New subresult from previous link
   297  				if !ok {
   298  					return
   299  				}
   300  				// Multiplex the section index to all bit-schedulers
   301  				for _, bloomSources := range sectionSources {
   302  					for _, bitSource := range bloomSources {
   303  						select {
   304  						case <-session.quit:
   305  							return
   306  						case bitSource <- subres.section:
   307  						}
   308  					}
   309  				}
   310  				// Notify the processor that this section will become available
   311  				select {
   312  				case <-session.quit:
   313  					return
   314  				case process <- subres:
   315  				}
   316  			}
   317  		}
   318  	}()
   319  
   320  	go func() {
   321  		// Tear down the goroutine and terminate the final sink channel
   322  		defer session.pend.Done()
   323  		defer close(results)
   324  
   325  		// Read the source notifications and collect the delivered results
   326  		for {
   327  			select {
   328  			case <-session.quit:
   329  				return
   330  
   331  			case subres, ok := <-process:
   332  				// Notified of a section being retrieved
   333  				if !ok {
   334  					return
   335  				}
   336  				// Gather all the sub-results and merge them together
   337  				var orVector []byte
   338  				for _, bloomSinks := range sectionSinks {
   339  					var andVector []byte
   340  					for _, bitSink := range bloomSinks {
   341  						var data []byte
   342  						select {
   343  						case <-session.quit:
   344  							return
   345  						case data = <-bitSink:
   346  						}
   347  						if andVector == nil {
   348  							andVector = make([]byte, int(m.sectionSize/8))
   349  							copy(andVector, data)
   350  						} else {
   351  							bitutil.ANDBytes(andVector, andVector, data)
   352  						}
   353  					}
   354  					if orVector == nil {
   355  						orVector = andVector
   356  					} else {
   357  						bitutil.ORBytes(orVector, orVector, andVector)
   358  					}
   359  				}
   360  
   361  				if orVector == nil {
   362  					orVector = make([]byte, int(m.sectionSize/8))
   363  				}
   364  				if subres.bitset != nil {
   365  					bitutil.ANDBytes(orVector, orVector, subres.bitset)
   366  				}
   367  				if bitutil.TestBytes(orVector) {
   368  					select {
   369  					case <-session.quit:
   370  						return
   371  					case results <- &partialMatches{subres.section, orVector}:
   372  					}
   373  				}
   374  			}
   375  		}
   376  	}()
   377  	return results
   378  }
   379  
   380  // distributor receives requests from the schedulers and queues them into a set
   381  // of pending requests, which are assigned to retrievers wanting to fulfil them.
   382  func (m *Matcher) distributor(dist chan *request, session *MatcherSession) {
   383  	defer session.pend.Done()
   384  
   385  	var (
   386  		requests   = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number
   387  		unallocs   = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever
   388  		retrievers chan chan uint            // Waiting retrievers (toggled to nil if unallocs is empty)
   389  	)
   390  	var (
   391  		allocs   int            // Number of active allocations to handle graceful shutdown requests
   392  		shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests
   393  	)
   394  
   395  	// assign is a helper method fo try to assign a pending bit an an actively
   396  	// listening servicer, or schedule it up for later when one arrives.
   397  	assign := func(bit uint) {
   398  		select {
   399  		case fetcher := <-m.retrievers:
   400  			allocs++
   401  			fetcher <- bit
   402  		default:
   403  			// No retrievers active, start listening for new ones
   404  			retrievers = m.retrievers
   405  			unallocs[bit] = struct{}{}
   406  		}
   407  	}
   408  
   409  	for {
   410  		select {
   411  		case <-shutdown:
   412  			// Graceful shutdown requested, wait until all pending requests are honoured
   413  			if allocs == 0 {
   414  				return
   415  			}
   416  			shutdown = nil
   417  
   418  		case <-session.kill:
   419  			// Pending requests not honoured in time, hard terminate
   420  			return
   421  
   422  		case req := <-dist:
   423  			// New retrieval request arrived to be distributed to some fetcher process
   424  			queue := requests[req.bit]
   425  			index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section })
   426  			requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...)
   427  
   428  			// If it's a new bit and we have waiting fetchers, allocate to them
   429  			if len(queue) == 0 {
   430  				assign(req.bit)
   431  			}
   432  
   433  		case fetcher := <-retrievers:
   434  			// New retriever arrived, find the lowest section-ed bit to assign
   435  			bit, best := uint(0), uint64(math.MaxUint64)
   436  			for idx := range unallocs {
   437  				if requests[idx][0] < best {
   438  					bit, best = idx, requests[idx][0]
   439  				}
   440  			}
   441  			// Stop tracking this bit (and alloc notifications if no more work is available)
   442  			delete(unallocs, bit)
   443  			if len(unallocs) == 0 {
   444  				retrievers = nil
   445  			}
   446  			allocs++
   447  			fetcher <- bit
   448  
   449  		case fetcher := <-m.counters:
   450  			// New task count request arrives, return number of items
   451  			fetcher <- uint(len(requests[<-fetcher]))
   452  
   453  		case fetcher := <-m.retrievals:
   454  			// New fetcher waiting for tasks to retrieve, assign
   455  			task := <-fetcher
   456  			if want := len(task.Sections); want >= len(requests[task.Bit]) {
   457  				task.Sections = requests[task.Bit]
   458  				delete(requests, task.Bit)
   459  			} else {
   460  				task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...)
   461  				requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...)
   462  			}
   463  			fetcher <- task
   464  
   465  			// If anything was left unallocated, try to assign to someone else
   466  			if len(requests[task.Bit]) > 0 {
   467  				assign(task.Bit)
   468  			}
   469  
   470  		case result := <-m.deliveries:
   471  			// New retrieval task response from fetcher, split out missing sections and
   472  			// deliver complete ones
   473  			var (
   474  				sections = make([]uint64, 0, len(result.Sections))
   475  				bitsets  = make([][]byte, 0, len(result.Bitsets))
   476  				missing  = make([]uint64, 0, len(result.Sections))
   477  			)
   478  			for i, bitset := range result.Bitsets {
   479  				if len(bitset) == 0 {
   480  					missing = append(missing, result.Sections[i])
   481  					continue
   482  				}
   483  				sections = append(sections, result.Sections[i])
   484  				bitsets = append(bitsets, bitset)
   485  			}
   486  			m.schedulers[result.Bit].deliver(sections, bitsets)
   487  			allocs--
   488  
   489  			// Reschedule missing sections and allocate bit if newly available
   490  			if len(missing) > 0 {
   491  				queue := requests[result.Bit]
   492  				for _, section := range missing {
   493  					index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section })
   494  					queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...)
   495  				}
   496  				requests[result.Bit] = queue
   497  
   498  				if len(queue) == len(missing) {
   499  					assign(result.Bit)
   500  				}
   501  			}
   502  			// If we're in the process of shutting down, terminate
   503  			if allocs == 0 && shutdown == nil {
   504  				return
   505  			}
   506  		}
   507  	}
   508  }
   509  
   510  // MatcherSession is returned by a started matcher to be used as a terminator
   511  // for the actively running matching operation.
   512  type MatcherSession struct {
   513  	matcher *Matcher
   514  
   515  	closer sync.Once     // Sync object to ensure we only ever close once
   516  	quit   chan struct{} // Quit channel to request pipeline termination
   517  	kill   chan struct{} // Term channel to signal non-graceful forced shutdown
   518  
   519  	ctx context.Context // Context used by the light client to abort filtering
   520  	err atomic.Value    // Global error to track retrieval failures deep in the chain
   521  
   522  	pend sync.WaitGroup
   523  }
   524  
   525  // Close stops the matching process and waits for all subprocesses to terminate
   526  // before returning. The timeout may be used for graceful shutdown, allowing the
   527  // currently running retrievals to complete before this time.
   528  func (s *MatcherSession) Close() {
   529  	s.closer.Do(func() {
   530  		// Signal termination and wait for all goroutines to tear down
   531  		close(s.quit)
   532  		time.AfterFunc(time.Second, func() { close(s.kill) })
   533  		s.pend.Wait()
   534  	})
   535  }
   536  
   537  // Error returns any failure encountered during the matching session.
   538  func (s *MatcherSession) Error() error {
   539  	if err := s.err.Load(); err != nil {
   540  		return err.(error)
   541  	}
   542  	return nil
   543  }
   544  
   545  // AllocateRetrieval assigns a bloom bit index to a client process that can either
   546  // immediately reuest and fetch the section contents assigned to this bit or wait
   547  // a little while for more sections to be requested.
   548  func (s *MatcherSession) AllocateRetrieval() (uint, bool) {
   549  	fetcher := make(chan uint)
   550  
   551  	select {
   552  	case <-s.quit:
   553  		return 0, false
   554  	case s.matcher.retrievers <- fetcher:
   555  		bit, ok := <-fetcher
   556  		return bit, ok
   557  	}
   558  }
   559  
   560  // PendingSections returns the number of pending section retrievals belonging to
   561  // the given bloom bit index.
   562  func (s *MatcherSession) PendingSections(bit uint) int {
   563  	fetcher := make(chan uint)
   564  
   565  	select {
   566  	case <-s.quit:
   567  		return 0
   568  	case s.matcher.counters <- fetcher:
   569  		fetcher <- bit
   570  		return int(<-fetcher)
   571  	}
   572  }
   573  
   574  // AllocateSections assigns all or part of an already allocated bit-task queue
   575  // to the requesting process.
   576  func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 {
   577  	fetcher := make(chan *Retrieval)
   578  
   579  	select {
   580  	case <-s.quit:
   581  		return nil
   582  	case s.matcher.retrievals <- fetcher:
   583  		task := &Retrieval{
   584  			Bit:      bit,
   585  			Sections: make([]uint64, count),
   586  		}
   587  		fetcher <- task
   588  		return (<-fetcher).Sections
   589  	}
   590  }
   591  
   592  // DeliverSections delivers a batch of section bit-vectors for a specific bloom
   593  // bit index to be injected into the processing pipeline.
   594  func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) {
   595  	select {
   596  	case <-s.kill:
   597  		return
   598  	case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}:
   599  	}
   600  }
   601  
   602  // Multiplex polls the matcher session for rerieval tasks and multiplexes it into
   603  // the reuested retrieval queue to be serviced together with other sessions.
   604  //
   605  // This method will block for the lifetime of the session. Even after termination
   606  // of the session, any request in-flight need to be responded to! Empty responses
   607  // are fine though in that case.
   608  func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) {
   609  	for {
   610  		// Allocate a new bloom bit index to retrieve data for, stopping when done
   611  		bit, ok := s.AllocateRetrieval()
   612  		if !ok {
   613  			return
   614  		}
   615  		// Bit allocated, throttle a bit if we're below our batch limit
   616  		if s.PendingSections(bit) < batch {
   617  			select {
   618  			case <-s.quit:
   619  				// Session terminating, we can't meaningfully service, abort
   620  				s.AllocateSections(bit, 0)
   621  				s.DeliverSections(bit, []uint64{}, [][]byte{})
   622  				return
   623  
   624  			case <-time.After(wait):
   625  				// Throttling up, fetch whatever's available
   626  			}
   627  		}
   628  		// Allocate as much as we can handle and request servicing
   629  		sections := s.AllocateSections(bit, batch)
   630  		request := make(chan *Retrieval)
   631  
   632  		select {
   633  		case <-s.quit:
   634  			// Session terminating, we can't meaningfully service, abort
   635  			s.DeliverSections(bit, sections, make([][]byte, len(sections)))
   636  			return
   637  
   638  		case mux <- request:
   639  			// Retrieval accepted, something must arrive before we're aborting
   640  			request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx}
   641  
   642  			result := <-request
   643  			if result.Error != nil {
   644  				s.err.Store(result.Error)
   645  				s.Close()
   646  			}
   647  			s.DeliverSections(result.Bit, result.Sections, result.Bitsets)
   648  		}
   649  	}
   650  }