github.com/aigarnetwork/aigar@v0.0.0-20191115204914-d59a6eb70f8e/core/bloombits/matcher.go (about)

     1  //  Copyright 2018 The go-ethereum Authors
     2  //  Copyright 2019 The go-aigar Authors
     3  //  This file is part of the go-aigar library.
     4  //
     5  //  The go-aigar library is free software: you can redistribute it and/or modify
     6  //  it under the terms of the GNU Lesser General Public License as published by
     7  //  the Free Software Foundation, either version 3 of the License, or
     8  //  (at your option) any later version.
     9  //
    10  //  The go-aigar library is distributed in the hope that it will be useful,
    11  //  but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  //  GNU Lesser General Public License for more details.
    14  //
    15  //  You should have received a copy of the GNU Lesser General Public License
    16  //  along with the go-aigar library. If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package bloombits
    19  
    20  import (
    21  	"bytes"
    22  	"context"
    23  	"errors"
    24  	"math"
    25  	"sort"
    26  	"sync"
    27  	"sync/atomic"
    28  	"time"
    29  
    30  	"github.com/AigarNetwork/aigar/common/bitutil"
    31  	"github.com/AigarNetwork/aigar/crypto"
    32  )
    33  
    34  // bloomIndexes represents the bit indexes inside the bloom filter that belong
    35  // to some key.
    36  type bloomIndexes [3]uint
    37  
    38  // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key.
    39  func calcBloomIndexes(b []byte) bloomIndexes {
    40  	b = crypto.Keccak256(b)
    41  
    42  	var idxs bloomIndexes
    43  	for i := 0; i < len(idxs); i++ {
    44  		idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1])
    45  	}
    46  	return idxs
    47  }
    48  
    49  // partialMatches with a non-nil vector represents a section in which some sub-
    50  // matchers have already found potential matches. Subsequent sub-matchers will
    51  // binary AND their matches with this vector. If vector is nil, it represents a
    52  // section to be processed by the first sub-matcher.
    53  type partialMatches struct {
    54  	section uint64
    55  	bitset  []byte
    56  }
    57  
    58  // Retrieval represents a request for retrieval task assignments for a given
    59  // bit with the given number of fetch elements, or a response for such a request.
    60  // It can also have the actual results set to be used as a delivery data struct.
    61  //
    62  // The contest and error fields are used by the light client to terminate matching
    63  // early if an error is encountered on some path of the pipeline.
    64  type Retrieval struct {
    65  	Bit      uint
    66  	Sections []uint64
    67  	Bitsets  [][]byte
    68  
    69  	Context context.Context
    70  	Error   error
    71  }
    72  
    73  // Matcher is a pipelined system of schedulers and logic matchers which perform
    74  // binary AND/OR operations on the bit-streams, creating a stream of potential
    75  // blocks to inspect for data content.
    76  type Matcher struct {
    77  	sectionSize uint64 // Size of the data batches to filter on
    78  
    79  	filters    [][]bloomIndexes    // Filter the system is matching for
    80  	schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits
    81  
    82  	retrievers chan chan uint       // Retriever processes waiting for bit allocations
    83  	counters   chan chan uint       // Retriever processes waiting for task count reports
    84  	retrievals chan chan *Retrieval // Retriever processes waiting for task allocations
    85  	deliveries chan *Retrieval      // Retriever processes waiting for task response deliveries
    86  
    87  	running uint32 // Atomic flag whether a session is live or not
    88  }
    89  
    90  // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing
    91  // address and topic filtering on them. Setting a filter component to `nil` is
    92  // allowed and will result in that filter rule being skipped (OR 0x11...1).
    93  func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher {
    94  	// Create the matcher instance
    95  	m := &Matcher{
    96  		sectionSize: sectionSize,
    97  		schedulers:  make(map[uint]*scheduler),
    98  		retrievers:  make(chan chan uint),
    99  		counters:    make(chan chan uint),
   100  		retrievals:  make(chan chan *Retrieval),
   101  		deliveries:  make(chan *Retrieval),
   102  	}
   103  	// Calculate the bloom bit indexes for the groups we're interested in
   104  	m.filters = nil
   105  
   106  	for _, filter := range filters {
   107  		// Gather the bit indexes of the filter rule, special casing the nil filter
   108  		if len(filter) == 0 {
   109  			continue
   110  		}
   111  		bloomBits := make([]bloomIndexes, len(filter))
   112  		for i, clause := range filter {
   113  			if clause == nil {
   114  				bloomBits = nil
   115  				break
   116  			}
   117  			bloomBits[i] = calcBloomIndexes(clause)
   118  		}
   119  		// Accumulate the filter rules if no nil rule was within
   120  		if bloomBits != nil {
   121  			m.filters = append(m.filters, bloomBits)
   122  		}
   123  	}
   124  	// For every bit, create a scheduler to load/download the bit vectors
   125  	for _, bloomIndexLists := range m.filters {
   126  		for _, bloomIndexList := range bloomIndexLists {
   127  			for _, bloomIndex := range bloomIndexList {
   128  				m.addScheduler(bloomIndex)
   129  			}
   130  		}
   131  	}
   132  	return m
   133  }
   134  
   135  // addScheduler adds a bit stream retrieval scheduler for the given bit index if
   136  // it has not existed before. If the bit is already selected for filtering, the
   137  // existing scheduler can be used.
   138  func (m *Matcher) addScheduler(idx uint) {
   139  	if _, ok := m.schedulers[idx]; ok {
   140  		return
   141  	}
   142  	m.schedulers[idx] = newScheduler(idx)
   143  }
   144  
   145  // Start starts the matching process and returns a stream of bloom matches in
   146  // a given range of blocks. If there are no more matches in the range, the result
   147  // channel is closed.
   148  func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) {
   149  	// Make sure we're not creating concurrent sessions
   150  	if atomic.SwapUint32(&m.running, 1) == 1 {
   151  		return nil, errors.New("matcher already running")
   152  	}
   153  	defer atomic.StoreUint32(&m.running, 0)
   154  
   155  	// Initiate a new matching round
   156  	session := &MatcherSession{
   157  		matcher: m,
   158  		quit:    make(chan struct{}),
   159  		kill:    make(chan struct{}),
   160  		ctx:     ctx,
   161  	}
   162  	for _, scheduler := range m.schedulers {
   163  		scheduler.reset()
   164  	}
   165  	sink := m.run(begin, end, cap(results), session)
   166  
   167  	// Read the output from the result sink and deliver to the user
   168  	session.pend.Add(1)
   169  	go func() {
   170  		defer session.pend.Done()
   171  		defer close(results)
   172  
   173  		for {
   174  			select {
   175  			case <-session.quit:
   176  				return
   177  
   178  			case res, ok := <-sink:
   179  				// New match result found
   180  				if !ok {
   181  					return
   182  				}
   183  				// Calculate the first and last blocks of the section
   184  				sectionStart := res.section * m.sectionSize
   185  
   186  				first := sectionStart
   187  				if begin > first {
   188  					first = begin
   189  				}
   190  				last := sectionStart + m.sectionSize - 1
   191  				if end < last {
   192  					last = end
   193  				}
   194  				// Iterate over all the blocks in the section and return the matching ones
   195  				for i := first; i <= last; i++ {
   196  					// Skip the entire byte if no matches are found inside (and we're processing an entire byte!)
   197  					next := res.bitset[(i-sectionStart)/8]
   198  					if next == 0 {
   199  						if i%8 == 0 {
   200  							i += 7
   201  						}
   202  						continue
   203  					}
   204  					// Some bit it set, do the actual submatching
   205  					if bit := 7 - i%8; next&(1<<bit) != 0 {
   206  						select {
   207  						case <-session.quit:
   208  							return
   209  						case results <- i:
   210  						}
   211  					}
   212  				}
   213  			}
   214  		}
   215  	}()
   216  	return session, nil
   217  }
   218  
   219  // run creates a daisy-chain of sub-matchers, one for the address set and one
   220  // for each topic set, each sub-matcher receiving a section only if the previous
   221  // ones have all found a potential match in one of the blocks of the section,
   222  // then binary AND-ing its own matches and forwarding the result to the next one.
   223  //
   224  // The method starts feeding the section indexes into the first sub-matcher on a
   225  // new goroutine and returns a sink channel receiving the results.
   226  func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches {
   227  	// Create the source channel and feed section indexes into
   228  	source := make(chan *partialMatches, buffer)
   229  
   230  	session.pend.Add(1)
   231  	go func() {
   232  		defer session.pend.Done()
   233  		defer close(source)
   234  
   235  		for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ {
   236  			select {
   237  			case <-session.quit:
   238  				return
   239  			case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}:
   240  			}
   241  		}
   242  	}()
   243  	// Assemble the daisy-chained filtering pipeline
   244  	next := source
   245  	dist := make(chan *request, buffer)
   246  
   247  	for _, bloom := range m.filters {
   248  		next = m.subMatch(next, dist, bloom, session)
   249  	}
   250  	// Start the request distribution
   251  	session.pend.Add(1)
   252  	go m.distributor(dist, session)
   253  
   254  	return next
   255  }
   256  
   257  // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then
   258  // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output.
   259  // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to
   260  // that address/topic, and binary AND-ing those vectors together.
   261  func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches {
   262  	// Start the concurrent schedulers for each bit required by the bloom filter
   263  	sectionSources := make([][3]chan uint64, len(bloom))
   264  	sectionSinks := make([][3]chan []byte, len(bloom))
   265  	for i, bits := range bloom {
   266  		for j, bit := range bits {
   267  			sectionSources[i][j] = make(chan uint64, cap(source))
   268  			sectionSinks[i][j] = make(chan []byte, cap(source))
   269  
   270  			m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend)
   271  		}
   272  	}
   273  
   274  	process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated
   275  	results := make(chan *partialMatches, cap(source))
   276  
   277  	session.pend.Add(2)
   278  	go func() {
   279  		// Tear down the goroutine and terminate all source channels
   280  		defer session.pend.Done()
   281  		defer close(process)
   282  
   283  		defer func() {
   284  			for _, bloomSources := range sectionSources {
   285  				for _, bitSource := range bloomSources {
   286  					close(bitSource)
   287  				}
   288  			}
   289  		}()
   290  		// Read sections from the source channel and multiplex into all bit-schedulers
   291  		for {
   292  			select {
   293  			case <-session.quit:
   294  				return
   295  
   296  			case subres, ok := <-source:
   297  				// New subresult from previous link
   298  				if !ok {
   299  					return
   300  				}
   301  				// Multiplex the section index to all bit-schedulers
   302  				for _, bloomSources := range sectionSources {
   303  					for _, bitSource := range bloomSources {
   304  						select {
   305  						case <-session.quit:
   306  							return
   307  						case bitSource <- subres.section:
   308  						}
   309  					}
   310  				}
   311  				// Notify the processor that this section will become available
   312  				select {
   313  				case <-session.quit:
   314  					return
   315  				case process <- subres:
   316  				}
   317  			}
   318  		}
   319  	}()
   320  
   321  	go func() {
   322  		// Tear down the goroutine and terminate the final sink channel
   323  		defer session.pend.Done()
   324  		defer close(results)
   325  
   326  		// Read the source notifications and collect the delivered results
   327  		for {
   328  			select {
   329  			case <-session.quit:
   330  				return
   331  
   332  			case subres, ok := <-process:
   333  				// Notified of a section being retrieved
   334  				if !ok {
   335  					return
   336  				}
   337  				// Gather all the sub-results and merge them together
   338  				var orVector []byte
   339  				for _, bloomSinks := range sectionSinks {
   340  					var andVector []byte
   341  					for _, bitSink := range bloomSinks {
   342  						var data []byte
   343  						select {
   344  						case <-session.quit:
   345  							return
   346  						case data = <-bitSink:
   347  						}
   348  						if andVector == nil {
   349  							andVector = make([]byte, int(m.sectionSize/8))
   350  							copy(andVector, data)
   351  						} else {
   352  							bitutil.ANDBytes(andVector, andVector, data)
   353  						}
   354  					}
   355  					if orVector == nil {
   356  						orVector = andVector
   357  					} else {
   358  						bitutil.ORBytes(orVector, orVector, andVector)
   359  					}
   360  				}
   361  
   362  				if orVector == nil {
   363  					orVector = make([]byte, int(m.sectionSize/8))
   364  				}
   365  				if subres.bitset != nil {
   366  					bitutil.ANDBytes(orVector, orVector, subres.bitset)
   367  				}
   368  				if bitutil.TestBytes(orVector) {
   369  					select {
   370  					case <-session.quit:
   371  						return
   372  					case results <- &partialMatches{subres.section, orVector}:
   373  					}
   374  				}
   375  			}
   376  		}
   377  	}()
   378  	return results
   379  }
   380  
   381  // distributor receives requests from the schedulers and queues them into a set
   382  // of pending requests, which are assigned to retrievers wanting to fulfil them.
   383  func (m *Matcher) distributor(dist chan *request, session *MatcherSession) {
   384  	defer session.pend.Done()
   385  
   386  	var (
   387  		requests   = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number
   388  		unallocs   = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever
   389  		retrievers chan chan uint            // Waiting retrievers (toggled to nil if unallocs is empty)
   390  	)
   391  	var (
   392  		allocs   int            // Number of active allocations to handle graceful shutdown requests
   393  		shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests
   394  	)
   395  
   396  	// assign is a helper method fo try to assign a pending bit an actively
   397  	// listening servicer, or schedule it up for later when one arrives.
   398  	assign := func(bit uint) {
   399  		select {
   400  		case fetcher := <-m.retrievers:
   401  			allocs++
   402  			fetcher <- bit
   403  		default:
   404  			// No retrievers active, start listening for new ones
   405  			retrievers = m.retrievers
   406  			unallocs[bit] = struct{}{}
   407  		}
   408  	}
   409  
   410  	for {
   411  		select {
   412  		case <-shutdown:
   413  			// Graceful shutdown requested, wait until all pending requests are honoured
   414  			if allocs == 0 {
   415  				return
   416  			}
   417  			shutdown = nil
   418  
   419  		case <-session.kill:
   420  			// Pending requests not honoured in time, hard terminate
   421  			return
   422  
   423  		case req := <-dist:
   424  			// New retrieval request arrived to be distributed to some fetcher process
   425  			queue := requests[req.bit]
   426  			index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section })
   427  			requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...)
   428  
   429  			// If it's a new bit and we have waiting fetchers, allocate to them
   430  			if len(queue) == 0 {
   431  				assign(req.bit)
   432  			}
   433  
   434  		case fetcher := <-retrievers:
   435  			// New retriever arrived, find the lowest section-ed bit to assign
   436  			bit, best := uint(0), uint64(math.MaxUint64)
   437  			for idx := range unallocs {
   438  				if requests[idx][0] < best {
   439  					bit, best = idx, requests[idx][0]
   440  				}
   441  			}
   442  			// Stop tracking this bit (and alloc notifications if no more work is available)
   443  			delete(unallocs, bit)
   444  			if len(unallocs) == 0 {
   445  				retrievers = nil
   446  			}
   447  			allocs++
   448  			fetcher <- bit
   449  
   450  		case fetcher := <-m.counters:
   451  			// New task count request arrives, return number of items
   452  			fetcher <- uint(len(requests[<-fetcher]))
   453  
   454  		case fetcher := <-m.retrievals:
   455  			// New fetcher waiting for tasks to retrieve, assign
   456  			task := <-fetcher
   457  			if want := len(task.Sections); want >= len(requests[task.Bit]) {
   458  				task.Sections = requests[task.Bit]
   459  				delete(requests, task.Bit)
   460  			} else {
   461  				task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...)
   462  				requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...)
   463  			}
   464  			fetcher <- task
   465  
   466  			// If anything was left unallocated, try to assign to someone else
   467  			if len(requests[task.Bit]) > 0 {
   468  				assign(task.Bit)
   469  			}
   470  
   471  		case result := <-m.deliveries:
   472  			// New retrieval task response from fetcher, split out missing sections and
   473  			// deliver complete ones
   474  			var (
   475  				sections = make([]uint64, 0, len(result.Sections))
   476  				bitsets  = make([][]byte, 0, len(result.Bitsets))
   477  				missing  = make([]uint64, 0, len(result.Sections))
   478  			)
   479  			for i, bitset := range result.Bitsets {
   480  				if len(bitset) == 0 {
   481  					missing = append(missing, result.Sections[i])
   482  					continue
   483  				}
   484  				sections = append(sections, result.Sections[i])
   485  				bitsets = append(bitsets, bitset)
   486  			}
   487  			m.schedulers[result.Bit].deliver(sections, bitsets)
   488  			allocs--
   489  
   490  			// Reschedule missing sections and allocate bit if newly available
   491  			if len(missing) > 0 {
   492  				queue := requests[result.Bit]
   493  				for _, section := range missing {
   494  					index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section })
   495  					queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...)
   496  				}
   497  				requests[result.Bit] = queue
   498  
   499  				if len(queue) == len(missing) {
   500  					assign(result.Bit)
   501  				}
   502  			}
   503  			// If we're in the process of shutting down, terminate
   504  			if allocs == 0 && shutdown == nil {
   505  				return
   506  			}
   507  		}
   508  	}
   509  }
   510  
   511  // MatcherSession is returned by a started matcher to be used as a terminator
   512  // for the actively running matching operation.
   513  type MatcherSession struct {
   514  	matcher *Matcher
   515  
   516  	closer sync.Once     // Sync object to ensure we only ever close once
   517  	quit   chan struct{} // Quit channel to request pipeline termination
   518  	kill   chan struct{} // Term channel to signal non-graceful forced shutdown
   519  
   520  	ctx context.Context // Context used by the light client to abort filtering
   521  	err atomic.Value    // Global error to track retrieval failures deep in the chain
   522  
   523  	pend sync.WaitGroup
   524  }
   525  
   526  // Close stops the matching process and waits for all subprocesses to terminate
   527  // before returning. The timeout may be used for graceful shutdown, allowing the
   528  // currently running retrievals to complete before this time.
   529  func (s *MatcherSession) Close() {
   530  	s.closer.Do(func() {
   531  		// Signal termination and wait for all goroutines to tear down
   532  		close(s.quit)
   533  		time.AfterFunc(time.Second, func() { close(s.kill) })
   534  		s.pend.Wait()
   535  	})
   536  }
   537  
   538  // Error returns any failure encountered during the matching session.
   539  func (s *MatcherSession) Error() error {
   540  	if err := s.err.Load(); err != nil {
   541  		return err.(error)
   542  	}
   543  	return nil
   544  }
   545  
   546  // AllocateRetrieval assigns a bloom bit index to a client process that can either
   547  // immediately request and fetch the section contents assigned to this bit or wait
   548  // a little while for more sections to be requested.
   549  func (s *MatcherSession) AllocateRetrieval() (uint, bool) {
   550  	fetcher := make(chan uint)
   551  
   552  	select {
   553  	case <-s.quit:
   554  		return 0, false
   555  	case s.matcher.retrievers <- fetcher:
   556  		bit, ok := <-fetcher
   557  		return bit, ok
   558  	}
   559  }
   560  
   561  // PendingSections returns the number of pending section retrievals belonging to
   562  // the given bloom bit index.
   563  func (s *MatcherSession) PendingSections(bit uint) int {
   564  	fetcher := make(chan uint)
   565  
   566  	select {
   567  	case <-s.quit:
   568  		return 0
   569  	case s.matcher.counters <- fetcher:
   570  		fetcher <- bit
   571  		return int(<-fetcher)
   572  	}
   573  }
   574  
   575  // AllocateSections assigns all or part of an already allocated bit-task queue
   576  // to the requesting process.
   577  func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 {
   578  	fetcher := make(chan *Retrieval)
   579  
   580  	select {
   581  	case <-s.quit:
   582  		return nil
   583  	case s.matcher.retrievals <- fetcher:
   584  		task := &Retrieval{
   585  			Bit:      bit,
   586  			Sections: make([]uint64, count),
   587  		}
   588  		fetcher <- task
   589  		return (<-fetcher).Sections
   590  	}
   591  }
   592  
   593  // DeliverSections delivers a batch of section bit-vectors for a specific bloom
   594  // bit index to be injected into the processing pipeline.
   595  func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) {
   596  	select {
   597  	case <-s.kill:
   598  		return
   599  	case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}:
   600  	}
   601  }
   602  
   603  // Multiplex polls the matcher session for retrieval tasks and multiplexes it into
   604  // the requested retrieval queue to be serviced together with other sessions.
   605  //
   606  // This method will block for the lifetime of the session. Even after termination
   607  // of the session, any request in-flight need to be responded to! Empty responses
   608  // are fine though in that case.
   609  func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) {
   610  	for {
   611  		// Allocate a new bloom bit index to retrieve data for, stopping when done
   612  		bit, ok := s.AllocateRetrieval()
   613  		if !ok {
   614  			return
   615  		}
   616  		// Bit allocated, throttle a bit if we're below our batch limit
   617  		if s.PendingSections(bit) < batch {
   618  			select {
   619  			case <-s.quit:
   620  				// Session terminating, we can't meaningfully service, abort
   621  				s.AllocateSections(bit, 0)
   622  				s.DeliverSections(bit, []uint64{}, [][]byte{})
   623  				return
   624  
   625  			case <-time.After(wait):
   626  				// Throttling up, fetch whatever's available
   627  			}
   628  		}
   629  		// Allocate as much as we can handle and request servicing
   630  		sections := s.AllocateSections(bit, batch)
   631  		request := make(chan *Retrieval)
   632  
   633  		select {
   634  		case <-s.quit:
   635  			// Session terminating, we can't meaningfully service, abort
   636  			s.DeliverSections(bit, sections, make([][]byte, len(sections)))
   637  			return
   638  
   639  		case mux <- request:
   640  			// Retrieval accepted, something must arrive before we're aborting
   641  			request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx}
   642  
   643  			result := <-request
   644  			if result.Error != nil {
   645  				s.err.Store(result.Error)
   646  				s.Close()
   647  			}
   648  			s.DeliverSections(result.Bit, result.Sections, result.Bitsets)
   649  		}
   650  	}
   651  }