github.com/sberex/go-sberex@v1.8.2-0.20181113200658-ed96ac38f7d7/core/bloombits/matcher.go (about)

     1  // This file is part of the go-sberex library. The go-sberex library is 
     2  // free software: you can redistribute it and/or modify it under the terms 
     3  // of the GNU Lesser General Public License as published by the Free 
     4  // Software Foundation, either version 3 of the License, or (at your option)
     5  // any later version.
     6  //
     7  // The go-sberex library is distributed in the hope that it will be useful, 
     8  // but WITHOUT ANY WARRANTY; without even the implied warranty of
     9  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser 
    10  // General Public License <http://www.gnu.org/licenses/> for more details.
    11  
    12  package bloombits
    13  
    14  import (
    15  	"bytes"
    16  	"context"
    17  	"errors"
    18  	"math"
    19  	"sort"
    20  	"sync"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	"github.com/Sberex/go-sberex/common/bitutil"
    25  	"github.com/Sberex/go-sberex/crypto"
    26  )
    27  
    28  // bloomIndexes represents the bit indexes inside the bloom filter that belong
    29  // to some key.
    30  type bloomIndexes [3]uint
    31  
    32  // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key.
    33  func calcBloomIndexes(b []byte) bloomIndexes {
    34  	b = crypto.Keccak256(b)
    35  
    36  	var idxs bloomIndexes
    37  	for i := 0; i < len(idxs); i++ {
    38  		idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1])
    39  	}
    40  	return idxs
    41  }
    42  
    43  // partialMatches with a non-nil vector represents a section in which some sub-
    44  // matchers have already found potential matches. Subsequent sub-matchers will
    45  // binary AND their matches with this vector. If vector is nil, it represents a
    46  // section to be processed by the first sub-matcher.
    47  type partialMatches struct {
    48  	section uint64
    49  	bitset  []byte
    50  }
    51  
    52  // Retrieval represents a request for retrieval task assignments for a given
    53  // bit with the given number of fetch elements, or a response for such a request.
    54  // It can also have the actual results set to be used as a delivery data struct.
    55  //
    56  // The contest and error fields are used by the light client to terminate matching
    57  // early if an error is enountered on some path of the pipeline.
    58  type Retrieval struct {
    59  	Bit      uint
    60  	Sections []uint64
    61  	Bitsets  [][]byte
    62  
    63  	Context context.Context
    64  	Error   error
    65  }
    66  
    67  // Matcher is a pipelined system of schedulers and logic matchers which perform
    68  // binary AND/OR operations on the bit-streams, creating a stream of potential
    69  // blocks to inspect for data content.
    70  type Matcher struct {
    71  	sectionSize uint64 // Size of the data batches to filter on
    72  
    73  	filters    [][]bloomIndexes    // Filter the system is matching for
    74  	schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits
    75  
    76  	retrievers chan chan uint       // Retriever processes waiting for bit allocations
    77  	counters   chan chan uint       // Retriever processes waiting for task count reports
    78  	retrievals chan chan *Retrieval // Retriever processes waiting for task allocations
    79  	deliveries chan *Retrieval      // Retriever processes waiting for task response deliveries
    80  
    81  	running uint32 // Atomic flag whether a session is live or not
    82  }
    83  
    84  // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing
    85  // address and topic filtering on them. Setting a filter component to `nil` is
    86  // allowed and will result in that filter rule being skipped (OR 0x11...1).
    87  func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher {
    88  	// Create the matcher instance
    89  	m := &Matcher{
    90  		sectionSize: sectionSize,
    91  		schedulers:  make(map[uint]*scheduler),
    92  		retrievers:  make(chan chan uint),
    93  		counters:    make(chan chan uint),
    94  		retrievals:  make(chan chan *Retrieval),
    95  		deliveries:  make(chan *Retrieval),
    96  	}
    97  	// Calculate the bloom bit indexes for the groups we're interested in
    98  	m.filters = nil
    99  
   100  	for _, filter := range filters {
   101  		// Gather the bit indexes of the filter rule, special casing the nil filter
   102  		if len(filter) == 0 {
   103  			continue
   104  		}
   105  		bloomBits := make([]bloomIndexes, len(filter))
   106  		for i, clause := range filter {
   107  			if clause == nil {
   108  				bloomBits = nil
   109  				break
   110  			}
   111  			bloomBits[i] = calcBloomIndexes(clause)
   112  		}
   113  		// Accumulate the filter rules if no nil rule was within
   114  		if bloomBits != nil {
   115  			m.filters = append(m.filters, bloomBits)
   116  		}
   117  	}
   118  	// For every bit, create a scheduler to load/download the bit vectors
   119  	for _, bloomIndexLists := range m.filters {
   120  		for _, bloomIndexList := range bloomIndexLists {
   121  			for _, bloomIndex := range bloomIndexList {
   122  				m.addScheduler(bloomIndex)
   123  			}
   124  		}
   125  	}
   126  	return m
   127  }
   128  
   129  // addScheduler adds a bit stream retrieval scheduler for the given bit index if
   130  // it has not existed before. If the bit is already selected for filtering, the
   131  // existing scheduler can be used.
   132  func (m *Matcher) addScheduler(idx uint) {
   133  	if _, ok := m.schedulers[idx]; ok {
   134  		return
   135  	}
   136  	m.schedulers[idx] = newScheduler(idx)
   137  }
   138  
   139  // Start starts the matching process and returns a stream of bloom matches in
   140  // a given range of blocks. If there are no more matches in the range, the result
   141  // channel is closed.
   142  func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) {
   143  	// Make sure we're not creating concurrent sessions
   144  	if atomic.SwapUint32(&m.running, 1) == 1 {
   145  		return nil, errors.New("matcher already running")
   146  	}
   147  	defer atomic.StoreUint32(&m.running, 0)
   148  
   149  	// Initiate a new matching round
   150  	session := &MatcherSession{
   151  		matcher: m,
   152  		quit:    make(chan struct{}),
   153  		kill:    make(chan struct{}),
   154  		ctx:     ctx,
   155  	}
   156  	for _, scheduler := range m.schedulers {
   157  		scheduler.reset()
   158  	}
   159  	sink := m.run(begin, end, cap(results), session)
   160  
   161  	// Read the output from the result sink and deliver to the user
   162  	session.pend.Add(1)
   163  	go func() {
   164  		defer session.pend.Done()
   165  		defer close(results)
   166  
   167  		for {
   168  			select {
   169  			case <-session.quit:
   170  				return
   171  
   172  			case res, ok := <-sink:
   173  				// New match result found
   174  				if !ok {
   175  					return
   176  				}
   177  				// Calculate the first and last blocks of the section
   178  				sectionStart := res.section * m.sectionSize
   179  
   180  				first := sectionStart
   181  				if begin > first {
   182  					first = begin
   183  				}
   184  				last := sectionStart + m.sectionSize - 1
   185  				if end < last {
   186  					last = end
   187  				}
   188  				// Iterate over all the blocks in the section and return the matching ones
   189  				for i := first; i <= last; i++ {
   190  					// Skip the entire byte if no matches are found inside (and we're processing an entire byte!)
   191  					next := res.bitset[(i-sectionStart)/8]
   192  					if next == 0 {
   193  						if i%8 == 0 {
   194  							i += 7
   195  						}
   196  						continue
   197  					}
   198  					// Some bit it set, do the actual submatching
   199  					if bit := 7 - i%8; next&(1<<bit) != 0 {
   200  						select {
   201  						case <-session.quit:
   202  							return
   203  						case results <- i:
   204  						}
   205  					}
   206  				}
   207  			}
   208  		}
   209  	}()
   210  	return session, nil
   211  }
   212  
   213  // run creates a daisy-chain of sub-matchers, one for the address set and one
   214  // for each topic set, each sub-matcher receiving a section only if the previous
   215  // ones have all found a potential match in one of the blocks of the section,
   216  // then binary AND-ing its own matches and forwaring the result to the next one.
   217  //
   218  // The method starts feeding the section indexes into the first sub-matcher on a
   219  // new goroutine and returns a sink channel receiving the results.
   220  func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches {
   221  	// Create the source channel and feed section indexes into
   222  	source := make(chan *partialMatches, buffer)
   223  
   224  	session.pend.Add(1)
   225  	go func() {
   226  		defer session.pend.Done()
   227  		defer close(source)
   228  
   229  		for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ {
   230  			select {
   231  			case <-session.quit:
   232  				return
   233  			case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}:
   234  			}
   235  		}
   236  	}()
   237  	// Assemble the daisy-chained filtering pipeline
   238  	next := source
   239  	dist := make(chan *request, buffer)
   240  
   241  	for _, bloom := range m.filters {
   242  		next = m.subMatch(next, dist, bloom, session)
   243  	}
   244  	// Start the request distribution
   245  	session.pend.Add(1)
   246  	go m.distributor(dist, session)
   247  
   248  	return next
   249  }
   250  
   251  // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then
   252  // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output.
   253  // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to
   254  // that address/topic, and binary AND-ing those vectors together.
   255  func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches {
   256  	// Start the concurrent schedulers for each bit required by the bloom filter
   257  	sectionSources := make([][3]chan uint64, len(bloom))
   258  	sectionSinks := make([][3]chan []byte, len(bloom))
   259  	for i, bits := range bloom {
   260  		for j, bit := range bits {
   261  			sectionSources[i][j] = make(chan uint64, cap(source))
   262  			sectionSinks[i][j] = make(chan []byte, cap(source))
   263  
   264  			m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend)
   265  		}
   266  	}
   267  
   268  	process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated
   269  	results := make(chan *partialMatches, cap(source))
   270  
   271  	session.pend.Add(2)
   272  	go func() {
   273  		// Tear down the goroutine and terminate all source channels
   274  		defer session.pend.Done()
   275  		defer close(process)
   276  
   277  		defer func() {
   278  			for _, bloomSources := range sectionSources {
   279  				for _, bitSource := range bloomSources {
   280  					close(bitSource)
   281  				}
   282  			}
   283  		}()
   284  		// Read sections from the source channel and multiplex into all bit-schedulers
   285  		for {
   286  			select {
   287  			case <-session.quit:
   288  				return
   289  
   290  			case subres, ok := <-source:
   291  				// New subresult from previous link
   292  				if !ok {
   293  					return
   294  				}
   295  				// Multiplex the section index to all bit-schedulers
   296  				for _, bloomSources := range sectionSources {
   297  					for _, bitSource := range bloomSources {
   298  						select {
   299  						case <-session.quit:
   300  							return
   301  						case bitSource <- subres.section:
   302  						}
   303  					}
   304  				}
   305  				// Notify the processor that this section will become available
   306  				select {
   307  				case <-session.quit:
   308  					return
   309  				case process <- subres:
   310  				}
   311  			}
   312  		}
   313  	}()
   314  
   315  	go func() {
   316  		// Tear down the goroutine and terminate the final sink channel
   317  		defer session.pend.Done()
   318  		defer close(results)
   319  
   320  		// Read the source notifications and collect the delivered results
   321  		for {
   322  			select {
   323  			case <-session.quit:
   324  				return
   325  
   326  			case subres, ok := <-process:
   327  				// Notified of a section being retrieved
   328  				if !ok {
   329  					return
   330  				}
   331  				// Gather all the sub-results and merge them together
   332  				var orVector []byte
   333  				for _, bloomSinks := range sectionSinks {
   334  					var andVector []byte
   335  					for _, bitSink := range bloomSinks {
   336  						var data []byte
   337  						select {
   338  						case <-session.quit:
   339  							return
   340  						case data = <-bitSink:
   341  						}
   342  						if andVector == nil {
   343  							andVector = make([]byte, int(m.sectionSize/8))
   344  							copy(andVector, data)
   345  						} else {
   346  							bitutil.ANDBytes(andVector, andVector, data)
   347  						}
   348  					}
   349  					if orVector == nil {
   350  						orVector = andVector
   351  					} else {
   352  						bitutil.ORBytes(orVector, orVector, andVector)
   353  					}
   354  				}
   355  
   356  				if orVector == nil {
   357  					orVector = make([]byte, int(m.sectionSize/8))
   358  				}
   359  				if subres.bitset != nil {
   360  					bitutil.ANDBytes(orVector, orVector, subres.bitset)
   361  				}
   362  				if bitutil.TestBytes(orVector) {
   363  					select {
   364  					case <-session.quit:
   365  						return
   366  					case results <- &partialMatches{subres.section, orVector}:
   367  					}
   368  				}
   369  			}
   370  		}
   371  	}()
   372  	return results
   373  }
   374  
   375  // distributor receives requests from the schedulers and queues them into a set
   376  // of pending requests, which are assigned to retrievers wanting to fulfil them.
   377  func (m *Matcher) distributor(dist chan *request, session *MatcherSession) {
   378  	defer session.pend.Done()
   379  
   380  	var (
   381  		requests   = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number
   382  		unallocs   = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever
   383  		retrievers chan chan uint            // Waiting retrievers (toggled to nil if unallocs is empty)
   384  	)
   385  	var (
   386  		allocs   int            // Number of active allocations to handle graceful shutdown requests
   387  		shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests
   388  	)
   389  
   390  	// assign is a helper method fo try to assign a pending bit an an actively
   391  	// listening servicer, or schedule it up for later when one arrives.
   392  	assign := func(bit uint) {
   393  		select {
   394  		case fetcher := <-m.retrievers:
   395  			allocs++
   396  			fetcher <- bit
   397  		default:
   398  			// No retrievers active, start listening for new ones
   399  			retrievers = m.retrievers
   400  			unallocs[bit] = struct{}{}
   401  		}
   402  	}
   403  
   404  	for {
   405  		select {
   406  		case <-shutdown:
   407  			// Graceful shutdown requested, wait until all pending requests are honoured
   408  			if allocs == 0 {
   409  				return
   410  			}
   411  			shutdown = nil
   412  
   413  		case <-session.kill:
   414  			// Pending requests not honoured in time, hard terminate
   415  			return
   416  
   417  		case req := <-dist:
   418  			// New retrieval request arrived to be distributed to some fetcher process
   419  			queue := requests[req.bit]
   420  			index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section })
   421  			requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...)
   422  
   423  			// If it's a new bit and we have waiting fetchers, allocate to them
   424  			if len(queue) == 0 {
   425  				assign(req.bit)
   426  			}
   427  
   428  		case fetcher := <-retrievers:
   429  			// New retriever arrived, find the lowest section-ed bit to assign
   430  			bit, best := uint(0), uint64(math.MaxUint64)
   431  			for idx := range unallocs {
   432  				if requests[idx][0] < best {
   433  					bit, best = idx, requests[idx][0]
   434  				}
   435  			}
   436  			// Stop tracking this bit (and alloc notifications if no more work is available)
   437  			delete(unallocs, bit)
   438  			if len(unallocs) == 0 {
   439  				retrievers = nil
   440  			}
   441  			allocs++
   442  			fetcher <- bit
   443  
   444  		case fetcher := <-m.counters:
   445  			// New task count request arrives, return number of items
   446  			fetcher <- uint(len(requests[<-fetcher]))
   447  
   448  		case fetcher := <-m.retrievals:
   449  			// New fetcher waiting for tasks to retrieve, assign
   450  			task := <-fetcher
   451  			if want := len(task.Sections); want >= len(requests[task.Bit]) {
   452  				task.Sections = requests[task.Bit]
   453  				delete(requests, task.Bit)
   454  			} else {
   455  				task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...)
   456  				requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...)
   457  			}
   458  			fetcher <- task
   459  
   460  			// If anything was left unallocated, try to assign to someone else
   461  			if len(requests[task.Bit]) > 0 {
   462  				assign(task.Bit)
   463  			}
   464  
   465  		case result := <-m.deliveries:
   466  			// New retrieval task response from fetcher, split out missing sections and
   467  			// deliver complete ones
   468  			var (
   469  				sections = make([]uint64, 0, len(result.Sections))
   470  				bitsets  = make([][]byte, 0, len(result.Bitsets))
   471  				missing  = make([]uint64, 0, len(result.Sections))
   472  			)
   473  			for i, bitset := range result.Bitsets {
   474  				if len(bitset) == 0 {
   475  					missing = append(missing, result.Sections[i])
   476  					continue
   477  				}
   478  				sections = append(sections, result.Sections[i])
   479  				bitsets = append(bitsets, bitset)
   480  			}
   481  			m.schedulers[result.Bit].deliver(sections, bitsets)
   482  			allocs--
   483  
   484  			// Reschedule missing sections and allocate bit if newly available
   485  			if len(missing) > 0 {
   486  				queue := requests[result.Bit]
   487  				for _, section := range missing {
   488  					index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section })
   489  					queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...)
   490  				}
   491  				requests[result.Bit] = queue
   492  
   493  				if len(queue) == len(missing) {
   494  					assign(result.Bit)
   495  				}
   496  			}
   497  			// If we're in the process of shutting down, terminate
   498  			if allocs == 0 && shutdown == nil {
   499  				return
   500  			}
   501  		}
   502  	}
   503  }
   504  
   505  // MatcherSession is returned by a started matcher to be used as a terminator
   506  // for the actively running matching operation.
   507  type MatcherSession struct {
   508  	matcher *Matcher
   509  
   510  	closer sync.Once     // Sync object to ensure we only ever close once
   511  	quit   chan struct{} // Quit channel to request pipeline termination
   512  	kill   chan struct{} // Term channel to signal non-graceful forced shutdown
   513  
   514  	ctx context.Context // Context used by the light client to abort filtering
   515  	err atomic.Value    // Global error to track retrieval failures deep in the chain
   516  
   517  	pend sync.WaitGroup
   518  }
   519  
   520  // Close stops the matching process and waits for all subprocesses to terminate
   521  // before returning. The timeout may be used for graceful shutdown, allowing the
   522  // currently running retrievals to complete before this time.
   523  func (s *MatcherSession) Close() {
   524  	s.closer.Do(func() {
   525  		// Signal termination and wait for all goroutines to tear down
   526  		close(s.quit)
   527  		time.AfterFunc(time.Second, func() { close(s.kill) })
   528  		s.pend.Wait()
   529  	})
   530  }
   531  
   532  // Error returns any failure encountered during the matching session.
   533  func (s *MatcherSession) Error() error {
   534  	if err := s.err.Load(); err != nil {
   535  		return err.(error)
   536  	}
   537  	return nil
   538  }
   539  
   540  // AllocateRetrieval assigns a bloom bit index to a client process that can either
   541  // immediately reuest and fetch the section contents assigned to this bit or wait
   542  // a little while for more sections to be requested.
   543  func (s *MatcherSession) AllocateRetrieval() (uint, bool) {
   544  	fetcher := make(chan uint)
   545  
   546  	select {
   547  	case <-s.quit:
   548  		return 0, false
   549  	case s.matcher.retrievers <- fetcher:
   550  		bit, ok := <-fetcher
   551  		return bit, ok
   552  	}
   553  }
   554  
   555  // PendingSections returns the number of pending section retrievals belonging to
   556  // the given bloom bit index.
   557  func (s *MatcherSession) PendingSections(bit uint) int {
   558  	fetcher := make(chan uint)
   559  
   560  	select {
   561  	case <-s.quit:
   562  		return 0
   563  	case s.matcher.counters <- fetcher:
   564  		fetcher <- bit
   565  		return int(<-fetcher)
   566  	}
   567  }
   568  
   569  // AllocateSections assigns all or part of an already allocated bit-task queue
   570  // to the requesting process.
   571  func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 {
   572  	fetcher := make(chan *Retrieval)
   573  
   574  	select {
   575  	case <-s.quit:
   576  		return nil
   577  	case s.matcher.retrievals <- fetcher:
   578  		task := &Retrieval{
   579  			Bit:      bit,
   580  			Sections: make([]uint64, count),
   581  		}
   582  		fetcher <- task
   583  		return (<-fetcher).Sections
   584  	}
   585  }
   586  
   587  // DeliverSections delivers a batch of section bit-vectors for a specific bloom
   588  // bit index to be injected into the processing pipeline.
   589  func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) {
   590  	select {
   591  	case <-s.kill:
   592  		return
   593  	case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}:
   594  	}
   595  }
   596  
   597  // Multiplex polls the matcher session for rerieval tasks and multiplexes it into
   598  // the reuested retrieval queue to be serviced together with other sessions.
   599  //
   600  // This method will block for the lifetime of the session. Even after termination
   601  // of the session, any request in-flight need to be responded to! Empty responses
   602  // are fine though in that case.
   603  func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) {
   604  	for {
   605  		// Allocate a new bloom bit index to retrieve data for, stopping when done
   606  		bit, ok := s.AllocateRetrieval()
   607  		if !ok {
   608  			return
   609  		}
   610  		// Bit allocated, throttle a bit if we're below our batch limit
   611  		if s.PendingSections(bit) < batch {
   612  			select {
   613  			case <-s.quit:
   614  				// Session terminating, we can't meaningfully service, abort
   615  				s.AllocateSections(bit, 0)
   616  				s.DeliverSections(bit, []uint64{}, [][]byte{})
   617  				return
   618  
   619  			case <-time.After(wait):
   620  				// Throttling up, fetch whatever's available
   621  			}
   622  		}
   623  		// Allocate as much as we can handle and request servicing
   624  		sections := s.AllocateSections(bit, batch)
   625  		request := make(chan *Retrieval)
   626  
   627  		select {
   628  		case <-s.quit:
   629  			// Session terminating, we can't meaningfully service, abort
   630  			s.DeliverSections(bit, sections, make([][]byte, len(sections)))
   631  			return
   632  
   633  		case mux <- request:
   634  			// Retrieval accepted, something must arrive before we're aborting
   635  			request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx}
   636  
   637  			result := <-request
   638  			if result.Error != nil {
   639  				s.err.Store(result.Error)
   640  				s.Close()
   641  			}
   642  			s.DeliverSections(result.Bit, result.Sections, result.Bitsets)
   643  		}
   644  	}
   645  }