github.com/arieschain/arieschain@v0.0.0-20191023063405-37c074544356/core/bloombits/matcher.go (about)

     1  package bloombits
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"errors"
     7  	"math"
     8  	"sort"
     9  	"sync"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"github.com/quickchainproject/quickchain/common/bitutil"
    14  	"github.com/quickchainproject/quickchain/crypto"
    15  )
    16  
    17  // bloomIndexes represents the bit indexes inside the bloom filter that belong
    18  // to some key.
    19  type bloomIndexes [3]uint
    20  
    21  // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key.
    22  func calcBloomIndexes(b []byte) bloomIndexes {
    23  	b = crypto.Keccak256(b)
    24  
    25  	var idxs bloomIndexes
    26  	for i := 0; i < len(idxs); i++ {
    27  		idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1])
    28  	}
    29  	return idxs
    30  }
    31  
    32  // partialMatches with a non-nil vector represents a section in which some sub-
    33  // matchers have already found potential matches. Subsequent sub-matchers will
    34  // binary AND their matches with this vector. If vector is nil, it represents a
    35  // section to be processed by the first sub-matcher.
    36  type partialMatches struct {
    37  	section uint64
    38  	bitset  []byte
    39  }
    40  
    41  // Retrieval represents a request for retrieval task assignments for a given
    42  // bit with the given number of fetch elements, or a response for such a request.
    43  // It can also have the actual results set to be used as a delivery data struct.
    44  //
    45  // The contest and error fields are used by the light client to terminate matching
    46  // early if an error is enountered on some path of the pipeline.
    47  type Retrieval struct {
    48  	Bit      uint
    49  	Sections []uint64
    50  	Bitsets  [][]byte
    51  
    52  	Context context.Context
    53  	Error   error
    54  }
    55  
    56  // Matcher is a pipelined system of schedulers and logic matchers which perform
    57  // binary AND/OR operations on the bit-streams, creating a stream of potential
    58  // blocks to inspect for data content.
    59  type Matcher struct {
    60  	sectionSize uint64 // Size of the data batches to filter on
    61  
    62  	filters    [][]bloomIndexes    // Filter the system is matching for
    63  	schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits
    64  
    65  	retrievers chan chan uint       // Retriever processes waiting for bit allocations
    66  	counters   chan chan uint       // Retriever processes waiting for task count reports
    67  	retrievals chan chan *Retrieval // Retriever processes waiting for task allocations
    68  	deliveries chan *Retrieval      // Retriever processes waiting for task response deliveries
    69  
    70  	running uint32 // Atomic flag whether a session is live or not
    71  }
    72  
    73  // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing
    74  // address and topic filtering on them. Setting a filter component to `nil` is
    75  // allowed and will result in that filter rule being skipped (OR 0x11...1).
    76  func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher {
    77  	// Create the matcher instance
    78  	m := &Matcher{
    79  		sectionSize: sectionSize,
    80  		schedulers:  make(map[uint]*scheduler),
    81  		retrievers:  make(chan chan uint),
    82  		counters:    make(chan chan uint),
    83  		retrievals:  make(chan chan *Retrieval),
    84  		deliveries:  make(chan *Retrieval),
    85  	}
    86  	// Calculate the bloom bit indexes for the groups we're interested in
    87  	m.filters = nil
    88  
    89  	for _, filter := range filters {
    90  		// Gather the bit indexes of the filter rule, special casing the nil filter
    91  		if len(filter) == 0 {
    92  			continue
    93  		}
    94  		bloomBits := make([]bloomIndexes, len(filter))
    95  		for i, clause := range filter {
    96  			if clause == nil {
    97  				bloomBits = nil
    98  				break
    99  			}
   100  			bloomBits[i] = calcBloomIndexes(clause)
   101  		}
   102  		// Accumulate the filter rules if no nil rule was within
   103  		if bloomBits != nil {
   104  			m.filters = append(m.filters, bloomBits)
   105  		}
   106  	}
   107  	// For every bit, create a scheduler to load/download the bit vectors
   108  	for _, bloomIndexLists := range m.filters {
   109  		for _, bloomIndexList := range bloomIndexLists {
   110  			for _, bloomIndex := range bloomIndexList {
   111  				m.addScheduler(bloomIndex)
   112  			}
   113  		}
   114  	}
   115  	return m
   116  }
   117  
   118  // addScheduler adds a bit stream retrieval scheduler for the given bit index if
   119  // it has not existed before. If the bit is already selected for filtering, the
   120  // existing scheduler can be used.
   121  func (m *Matcher) addScheduler(idx uint) {
   122  	if _, ok := m.schedulers[idx]; ok {
   123  		return
   124  	}
   125  	m.schedulers[idx] = newScheduler(idx)
   126  }
   127  
   128  // Start starts the matching process and returns a stream of bloom matches in
   129  // a given range of blocks. If there are no more matches in the range, the result
   130  // channel is closed.
   131  func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) {
   132  	// Make sure we're not creating concurrent sessions
   133  	if atomic.SwapUint32(&m.running, 1) == 1 {
   134  		return nil, errors.New("matcher already running")
   135  	}
   136  	defer atomic.StoreUint32(&m.running, 0)
   137  
   138  	// Initiate a new matching round
   139  	session := &MatcherSession{
   140  		matcher: m,
   141  		quit:    make(chan struct{}),
   142  		kill:    make(chan struct{}),
   143  		ctx:     ctx,
   144  	}
   145  	for _, scheduler := range m.schedulers {
   146  		scheduler.reset()
   147  	}
   148  	sink := m.run(begin, end, cap(results), session)
   149  
   150  	// Read the output from the result sink and deliver to the user
   151  	session.pend.Add(1)
   152  	go func() {
   153  		defer session.pend.Done()
   154  		defer close(results)
   155  
   156  		for {
   157  			select {
   158  			case <-session.quit:
   159  				return
   160  
   161  			case res, ok := <-sink:
   162  				// New match result found
   163  				if !ok {
   164  					return
   165  				}
   166  				// Calculate the first and last blocks of the section
   167  				sectionStart := res.section * m.sectionSize
   168  
   169  				first := sectionStart
   170  				if begin > first {
   171  					first = begin
   172  				}
   173  				last := sectionStart + m.sectionSize - 1
   174  				if end < last {
   175  					last = end
   176  				}
   177  				// Iterate over all the blocks in the section and return the matching ones
   178  				for i := first; i <= last; i++ {
   179  					// Skip the entire byte if no matches are found inside (and we're processing an entire byte!)
   180  					next := res.bitset[(i-sectionStart)/8]
   181  					if next == 0 {
   182  						if i%8 == 0 {
   183  							i += 7
   184  						}
   185  						continue
   186  					}
   187  					// Some bit it set, do the actual submatching
   188  					if bit := 7 - i%8; next&(1<<bit) != 0 {
   189  						select {
   190  						case <-session.quit:
   191  							return
   192  						case results <- i:
   193  						}
   194  					}
   195  				}
   196  			}
   197  		}
   198  	}()
   199  	return session, nil
   200  }
   201  
   202  // run creates a daisy-chain of sub-matchers, one for the address set and one
   203  // for each topic set, each sub-matcher receiving a section only if the previous
   204  // ones have all found a potential match in one of the blocks of the section,
   205  // then binary AND-ing its own matches and forwaring the result to the next one.
   206  //
   207  // The method starts feeding the section indexes into the first sub-matcher on a
   208  // new goroutine and returns a sink channel receiving the results.
   209  func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches {
   210  	// Create the source channel and feed section indexes into
   211  	source := make(chan *partialMatches, buffer)
   212  
   213  	session.pend.Add(1)
   214  	go func() {
   215  		defer session.pend.Done()
   216  		defer close(source)
   217  
   218  		for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ {
   219  			select {
   220  			case <-session.quit:
   221  				return
   222  			case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}:
   223  			}
   224  		}
   225  	}()
   226  	// Assemble the daisy-chained filtering pipeline
   227  	next := source
   228  	dist := make(chan *request, buffer)
   229  
   230  	for _, bloom := range m.filters {
   231  		next = m.subMatch(next, dist, bloom, session)
   232  	}
   233  	// Start the request distribution
   234  	session.pend.Add(1)
   235  	go m.distributor(dist, session)
   236  
   237  	return next
   238  }
   239  
   240  // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then
   241  // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output.
   242  // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to
   243  // that address/topic, and binary AND-ing those vectors together.
   244  func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches {
   245  	// Start the concurrent schedulers for each bit required by the bloom filter
   246  	sectionSources := make([][3]chan uint64, len(bloom))
   247  	sectionSinks := make([][3]chan []byte, len(bloom))
   248  	for i, bits := range bloom {
   249  		for j, bit := range bits {
   250  			sectionSources[i][j] = make(chan uint64, cap(source))
   251  			sectionSinks[i][j] = make(chan []byte, cap(source))
   252  
   253  			m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend)
   254  		}
   255  	}
   256  
   257  	process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated
   258  	results := make(chan *partialMatches, cap(source))
   259  
   260  	session.pend.Add(2)
   261  	go func() {
   262  		// Tear down the goroutine and terminate all source channels
   263  		defer session.pend.Done()
   264  		defer close(process)
   265  
   266  		defer func() {
   267  			for _, bloomSources := range sectionSources {
   268  				for _, bitSource := range bloomSources {
   269  					close(bitSource)
   270  				}
   271  			}
   272  		}()
   273  		// Read sections from the source channel and multiplex into all bit-schedulers
   274  		for {
   275  			select {
   276  			case <-session.quit:
   277  				return
   278  
   279  			case subres, ok := <-source:
   280  				// New subresult from previous link
   281  				if !ok {
   282  					return
   283  				}
   284  				// Multiplex the section index to all bit-schedulers
   285  				for _, bloomSources := range sectionSources {
   286  					for _, bitSource := range bloomSources {
   287  						select {
   288  						case <-session.quit:
   289  							return
   290  						case bitSource <- subres.section:
   291  						}
   292  					}
   293  				}
   294  				// Notify the processor that this section will become available
   295  				select {
   296  				case <-session.quit:
   297  					return
   298  				case process <- subres:
   299  				}
   300  			}
   301  		}
   302  	}()
   303  
   304  	go func() {
   305  		// Tear down the goroutine and terminate the final sink channel
   306  		defer session.pend.Done()
   307  		defer close(results)
   308  
   309  		// Read the source notifications and collect the delivered results
   310  		for {
   311  			select {
   312  			case <-session.quit:
   313  				return
   314  
   315  			case subres, ok := <-process:
   316  				// Notified of a section being retrieved
   317  				if !ok {
   318  					return
   319  				}
   320  				// Gather all the sub-results and merge them together
   321  				var orVector []byte
   322  				for _, bloomSinks := range sectionSinks {
   323  					var andVector []byte
   324  					for _, bitSink := range bloomSinks {
   325  						var data []byte
   326  						select {
   327  						case <-session.quit:
   328  							return
   329  						case data = <-bitSink:
   330  						}
   331  						if andVector == nil {
   332  							andVector = make([]byte, int(m.sectionSize/8))
   333  							copy(andVector, data)
   334  						} else {
   335  							bitutil.ANDBytes(andVector, andVector, data)
   336  						}
   337  					}
   338  					if orVector == nil {
   339  						orVector = andVector
   340  					} else {
   341  						bitutil.ORBytes(orVector, orVector, andVector)
   342  					}
   343  				}
   344  
   345  				if orVector == nil {
   346  					orVector = make([]byte, int(m.sectionSize/8))
   347  				}
   348  				if subres.bitset != nil {
   349  					bitutil.ANDBytes(orVector, orVector, subres.bitset)
   350  				}
   351  				if bitutil.TestBytes(orVector) {
   352  					select {
   353  					case <-session.quit:
   354  						return
   355  					case results <- &partialMatches{subres.section, orVector}:
   356  					}
   357  				}
   358  			}
   359  		}
   360  	}()
   361  	return results
   362  }
   363  
   364  // distributor receives requests from the schedulers and queues them into a set
   365  // of pending requests, which are assigned to retrievers wanting to fulfil them.
   366  func (m *Matcher) distributor(dist chan *request, session *MatcherSession) {
   367  	defer session.pend.Done()
   368  
   369  	var (
   370  		requests   = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number
   371  		unallocs   = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever
   372  		retrievers chan chan uint            // Waiting retrievers (toggled to nil if unallocs is empty)
   373  	)
   374  	var (
   375  		allocs   int            // Number of active allocations to handle graceful shutdown requests
   376  		shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests
   377  	)
   378  
   379  	// assign is a helper method fo try to assign a pending bit an actively
   380  	// listening servicer, or schedule it up for later when one arrives.
   381  	assign := func(bit uint) {
   382  		select {
   383  		case fetcher := <-m.retrievers:
   384  			allocs++
   385  			fetcher <- bit
   386  		default:
   387  			// No retrievers active, start listening for new ones
   388  			retrievers = m.retrievers
   389  			unallocs[bit] = struct{}{}
   390  		}
   391  	}
   392  
   393  	for {
   394  		select {
   395  		case <-shutdown:
   396  			// Graceful shutdown requested, wait until all pending requests are honoured
   397  			if allocs == 0 {
   398  				return
   399  			}
   400  			shutdown = nil
   401  
   402  		case <-session.kill:
   403  			// Pending requests not honoured in time, hard terminate
   404  			return
   405  
   406  		case req := <-dist:
   407  			// New retrieval request arrived to be distributed to some fetcher process
   408  			queue := requests[req.bit]
   409  			index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section })
   410  			requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...)
   411  
   412  			// If it's a new bit and we have waiting fetchers, allocate to them
   413  			if len(queue) == 0 {
   414  				assign(req.bit)
   415  			}
   416  
   417  		case fetcher := <-retrievers:
   418  			// New retriever arrived, find the lowest section-ed bit to assign
   419  			bit, best := uint(0), uint64(math.MaxUint64)
   420  			for idx := range unallocs {
   421  				if requests[idx][0] < best {
   422  					bit, best = idx, requests[idx][0]
   423  				}
   424  			}
   425  			// Stop tracking this bit (and alloc notifications if no more work is available)
   426  			delete(unallocs, bit)
   427  			if len(unallocs) == 0 {
   428  				retrievers = nil
   429  			}
   430  			allocs++
   431  			fetcher <- bit
   432  
   433  		case fetcher := <-m.counters:
   434  			// New task count request arrives, return number of items
   435  			fetcher <- uint(len(requests[<-fetcher]))
   436  
   437  		case fetcher := <-m.retrievals:
   438  			// New fetcher waiting for tasks to retrieve, assign
   439  			task := <-fetcher
   440  			if want := len(task.Sections); want >= len(requests[task.Bit]) {
   441  				task.Sections = requests[task.Bit]
   442  				delete(requests, task.Bit)
   443  			} else {
   444  				task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...)
   445  				requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...)
   446  			}
   447  			fetcher <- task
   448  
   449  			// If anything was left unallocated, try to assign to someone else
   450  			if len(requests[task.Bit]) > 0 {
   451  				assign(task.Bit)
   452  			}
   453  
   454  		case result := <-m.deliveries:
   455  			// New retrieval task response from fetcher, split out missing sections and
   456  			// deliver complete ones
   457  			var (
   458  				sections = make([]uint64, 0, len(result.Sections))
   459  				bitsets  = make([][]byte, 0, len(result.Bitsets))
   460  				missing  = make([]uint64, 0, len(result.Sections))
   461  			)
   462  			for i, bitset := range result.Bitsets {
   463  				if len(bitset) == 0 {
   464  					missing = append(missing, result.Sections[i])
   465  					continue
   466  				}
   467  				sections = append(sections, result.Sections[i])
   468  				bitsets = append(bitsets, bitset)
   469  			}
   470  			m.schedulers[result.Bit].deliver(sections, bitsets)
   471  			allocs--
   472  
   473  			// Reschedule missing sections and allocate bit if newly available
   474  			if len(missing) > 0 {
   475  				queue := requests[result.Bit]
   476  				for _, section := range missing {
   477  					index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section })
   478  					queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...)
   479  				}
   480  				requests[result.Bit] = queue
   481  
   482  				if len(queue) == len(missing) {
   483  					assign(result.Bit)
   484  				}
   485  			}
   486  			// If we're in the process of shutting down, terminate
   487  			if allocs == 0 && shutdown == nil {
   488  				return
   489  			}
   490  		}
   491  	}
   492  }
   493  
   494  // MatcherSession is returned by a started matcher to be used as a terminator
   495  // for the actively running matching operation.
   496  type MatcherSession struct {
   497  	matcher *Matcher
   498  
   499  	closer sync.Once     // Sync object to ensure we only ever close once
   500  	quit   chan struct{} // Quit channel to request pipeline termination
   501  	kill   chan struct{} // Term channel to signal non-graceful forced shutdown
   502  
   503  	ctx context.Context // Context used by the light client to abort filtering
   504  	err atomic.Value    // Global error to track retrieval failures deep in the chain
   505  
   506  	pend sync.WaitGroup
   507  }
   508  
   509  // Close stops the matching process and waits for all subprocesses to terminate
   510  // before returning. The timeout may be used for graceful shutdown, allowing the
   511  // currently running retrievals to complete before this time.
   512  func (s *MatcherSession) Close() {
   513  	s.closer.Do(func() {
   514  		// Signal termination and wait for all goroutines to tear down
   515  		close(s.quit)
   516  		time.AfterFunc(time.Second, func() { close(s.kill) })
   517  		s.pend.Wait()
   518  	})
   519  }
   520  
   521  // Error returns any failure encountered during the matching session.
   522  func (s *MatcherSession) Error() error {
   523  	if err := s.err.Load(); err != nil {
   524  		return err.(error)
   525  	}
   526  	return nil
   527  }
   528  
   529  // AllocateRetrieval assigns a bloom bit index to a client process that can either
   530  // immediately reuest and fetch the section contents assigned to this bit or wait
   531  // a little while for more sections to be requested.
   532  func (s *MatcherSession) AllocateRetrieval() (uint, bool) {
   533  	fetcher := make(chan uint)
   534  
   535  	select {
   536  	case <-s.quit:
   537  		return 0, false
   538  	case s.matcher.retrievers <- fetcher:
   539  		bit, ok := <-fetcher
   540  		return bit, ok
   541  	}
   542  }
   543  
   544  // PendingSections returns the number of pending section retrievals belonging to
   545  // the given bloom bit index.
   546  func (s *MatcherSession) PendingSections(bit uint) int {
   547  	fetcher := make(chan uint)
   548  
   549  	select {
   550  	case <-s.quit:
   551  		return 0
   552  	case s.matcher.counters <- fetcher:
   553  		fetcher <- bit
   554  		return int(<-fetcher)
   555  	}
   556  }
   557  
   558  // AllocateSections assigns all or part of an already allocated bit-task queue
   559  // to the requesting process.
   560  func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 {
   561  	fetcher := make(chan *Retrieval)
   562  
   563  	select {
   564  	case <-s.quit:
   565  		return nil
   566  	case s.matcher.retrievals <- fetcher:
   567  		task := &Retrieval{
   568  			Bit:      bit,
   569  			Sections: make([]uint64, count),
   570  		}
   571  		fetcher <- task
   572  		return (<-fetcher).Sections
   573  	}
   574  }
   575  
   576  // DeliverSections delivers a batch of section bit-vectors for a specific bloom
   577  // bit index to be injected into the processing pipeline.
   578  func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) {
   579  	select {
   580  	case <-s.kill:
   581  		return
   582  	case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}:
   583  	}
   584  }
   585  
   586  // Multiplex polls the matcher session for rerieval tasks and multiplexes it into
   587  // the reuested retrieval queue to be serviced together with other sessions.
   588  //
   589  // This method will block for the lifetime of the session. Even after termination
   590  // of the session, any request in-flight need to be responded to! Empty responses
   591  // are fine though in that case.
   592  func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) {
   593  	for {
   594  		// Allocate a new bloom bit index to retrieve data for, stopping when done
   595  		bit, ok := s.AllocateRetrieval()
   596  		if !ok {
   597  			return
   598  		}
   599  		// Bit allocated, throttle a bit if we're below our batch limit
   600  		if s.PendingSections(bit) < batch {
   601  			select {
   602  			case <-s.quit:
   603  				// Session terminating, we can't meaningfully service, abort
   604  				s.AllocateSections(bit, 0)
   605  				s.DeliverSections(bit, []uint64{}, [][]byte{})
   606  				return
   607  
   608  			case <-time.After(wait):
   609  				// Throttling up, fetch whatever's available
   610  			}
   611  		}
   612  		// Allocate as much as we can handle and request servicing
   613  		sections := s.AllocateSections(bit, batch)
   614  		request := make(chan *Retrieval)
   615  
   616  		select {
   617  		case <-s.quit:
   618  			// Session terminating, we can't meaningfully service, abort
   619  			s.DeliverSections(bit, sections, make([][]byte, len(sections)))
   620  			return
   621  
   622  		case mux <- request:
   623  			// Retrieval accepted, something must arrive before we're aborting
   624  			request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx}
   625  
   626  			result := <-request
   627  			if result.Error != nil {
   628  				s.err.Store(result.Error)
   629  				s.Close()
   630  			}
   631  			s.DeliverSections(result.Bit, result.Sections, result.Bitsets)
   632  		}
   633  	}
   634  }