github.com/etherbanking/go-etherbanking@v1.7.1-0.20181009210156-cf649bca5aba/core/bloombits/matcher.go (about)

     1  // Copyright 2017 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package bloombits
    18  
    19  import (
    20  	"bytes"
    21  	"errors"
    22  	"math"
    23  	"sort"
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	"github.com/etherbanking/go-etherbanking/common/bitutil"
    29  	"github.com/etherbanking/go-etherbanking/crypto"
    30  )
    31  
    32  // bloomIndexes represents the bit indexes inside the bloom filter that belong
    33  // to some key.
    34  type bloomIndexes [3]uint
    35  
    36  // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key.
    37  func calcBloomIndexes(b []byte) bloomIndexes {
    38  	b = crypto.Keccak256(b)
    39  
    40  	var idxs bloomIndexes
    41  	for i := 0; i < len(idxs); i++ {
    42  		idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1])
    43  	}
    44  	return idxs
    45  }
    46  
    47  // partialMatches with a non-nil vector represents a section in which some sub-
    48  // matchers have already found potential matches. Subsequent sub-matchers will
    49  // binary AND their matches with this vector. If vector is nil, it represents a
    50  // section to be processed by the first sub-matcher.
    51  type partialMatches struct {
    52  	section uint64
    53  	bitset  []byte
    54  }
    55  
    56  // Retrieval represents a request for retrieval task assignments for a given
    57  // bit with the given number of fetch elements, or a response for such a request.
    58  // It can also have the actual results set to be used as a delivery data struct.
    59  type Retrieval struct {
    60  	Bit      uint
    61  	Sections []uint64
    62  	Bitsets  [][]byte
    63  }
    64  
    65  // Matcher is a pipelined system of schedulers and logic matchers which perform
    66  // binary AND/OR operations on the bit-streams, creating a stream of potential
    67  // blocks to inspect for data content.
    68  type Matcher struct {
    69  	sectionSize uint64 // Size of the data batches to filter on
    70  
    71  	filters    [][]bloomIndexes    // Filter the system is matching for
    72  	schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits
    73  
    74  	retrievers chan chan uint       // Retriever processes waiting for bit allocations
    75  	counters   chan chan uint       // Retriever processes waiting for task count reports
    76  	retrievals chan chan *Retrieval // Retriever processes waiting for task allocations
    77  	deliveries chan *Retrieval      // Retriever processes waiting for task response deliveries
    78  
    79  	running uint32 // Atomic flag whether a session is live or not
    80  }
    81  
    82  // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing
    83  // address and topic filtering on them.
    84  func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher {
    85  	// Create the matcher instance
    86  	m := &Matcher{
    87  		sectionSize: sectionSize,
    88  		schedulers:  make(map[uint]*scheduler),
    89  		retrievers:  make(chan chan uint),
    90  		counters:    make(chan chan uint),
    91  		retrievals:  make(chan chan *Retrieval),
    92  		deliveries:  make(chan *Retrieval),
    93  	}
    94  	// Calculate the bloom bit indexes for the groups we're interested in
    95  	m.filters = nil
    96  
    97  	for _, filter := range filters {
    98  		bloomBits := make([]bloomIndexes, len(filter))
    99  		for i, clause := range filter {
   100  			bloomBits[i] = calcBloomIndexes(clause)
   101  		}
   102  		m.filters = append(m.filters, bloomBits)
   103  	}
   104  	// For every bit, create a scheduler to load/download the bit vectors
   105  	for _, bloomIndexLists := range m.filters {
   106  		for _, bloomIndexList := range bloomIndexLists {
   107  			for _, bloomIndex := range bloomIndexList {
   108  				m.addScheduler(bloomIndex)
   109  			}
   110  		}
   111  	}
   112  	return m
   113  }
   114  
   115  // addScheduler adds a bit stream retrieval scheduler for the given bit index if
   116  // it has not existed before. If the bit is already selected for filtering, the
   117  // existing scheduler can be used.
   118  func (m *Matcher) addScheduler(idx uint) {
   119  	if _, ok := m.schedulers[idx]; ok {
   120  		return
   121  	}
   122  	m.schedulers[idx] = newScheduler(idx)
   123  }
   124  
   125  // Start starts the matching process and returns a stream of bloom matches in
   126  // a given range of blocks. If there are no more matches in the range, the result
   127  // channel is closed.
   128  func (m *Matcher) Start(begin, end uint64, results chan uint64) (*MatcherSession, error) {
   129  	// Make sure we're not creating concurrent sessions
   130  	if atomic.SwapUint32(&m.running, 1) == 1 {
   131  		return nil, errors.New("matcher already running")
   132  	}
   133  	defer atomic.StoreUint32(&m.running, 0)
   134  
   135  	// Initiate a new matching round
   136  	session := &MatcherSession{
   137  		matcher: m,
   138  		quit:    make(chan struct{}),
   139  		kill:    make(chan struct{}),
   140  	}
   141  	for _, scheduler := range m.schedulers {
   142  		scheduler.reset()
   143  	}
   144  	sink := m.run(begin, end, cap(results), session)
   145  
   146  	// Read the output from the result sink and deliver to the user
   147  	session.pend.Add(1)
   148  	go func() {
   149  		defer session.pend.Done()
   150  		defer close(results)
   151  
   152  		for {
   153  			select {
   154  			case <-session.quit:
   155  				return
   156  
   157  			case res, ok := <-sink:
   158  				// New match result found
   159  				if !ok {
   160  					return
   161  				}
   162  				// Calculate the first and last blocks of the section
   163  				sectionStart := res.section * m.sectionSize
   164  
   165  				first := sectionStart
   166  				if begin > first {
   167  					first = begin
   168  				}
   169  				last := sectionStart + m.sectionSize - 1
   170  				if end < last {
   171  					last = end
   172  				}
   173  				// Iterate over all the blocks in the section and return the matching ones
   174  				for i := first; i <= last; i++ {
   175  					// Skip the entire byte if no matches are found inside
   176  					next := res.bitset[(i-sectionStart)/8]
   177  					if next == 0 {
   178  						i += 7
   179  						continue
   180  					}
   181  					// Some bit it set, do the actual submatching
   182  					if bit := 7 - i%8; next&(1<<bit) != 0 {
   183  						select {
   184  						case <-session.quit:
   185  							return
   186  						case results <- i:
   187  						}
   188  					}
   189  				}
   190  			}
   191  		}
   192  	}()
   193  	return session, nil
   194  }
   195  
   196  // run creates a daisy-chain of sub-matchers, one for the address set and one
   197  // for each topic set, each sub-matcher receiving a section only if the previous
   198  // ones have all found a potential match in one of the blocks of the section,
   199  // then binary AND-ing its own matches and forwaring the result to the next one.
   200  //
   201  // The method starts feeding the section indexes into the first sub-matcher on a
   202  // new goroutine and returns a sink channel receiving the results.
   203  func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches {
   204  	// Create the source channel and feed section indexes into
   205  	source := make(chan *partialMatches, buffer)
   206  
   207  	session.pend.Add(1)
   208  	go func() {
   209  		defer session.pend.Done()
   210  		defer close(source)
   211  
   212  		for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ {
   213  			select {
   214  			case <-session.quit:
   215  				return
   216  			case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}:
   217  			}
   218  		}
   219  	}()
   220  	// Assemble the daisy-chained filtering pipeline
   221  	next := source
   222  	dist := make(chan *request, buffer)
   223  
   224  	for _, bloom := range m.filters {
   225  		next = m.subMatch(next, dist, bloom, session)
   226  	}
   227  	// Start the request distribution
   228  	session.pend.Add(1)
   229  	go m.distributor(dist, session)
   230  
   231  	return next
   232  }
   233  
   234  // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then
   235  // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output.
   236  // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to
   237  // that address/topic, and binary AND-ing those vectors together.
   238  func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches {
   239  	// Start the concurrent schedulers for each bit required by the bloom filter
   240  	sectionSources := make([][3]chan uint64, len(bloom))
   241  	sectionSinks := make([][3]chan []byte, len(bloom))
   242  	for i, bits := range bloom {
   243  		for j, bit := range bits {
   244  			sectionSources[i][j] = make(chan uint64, cap(source))
   245  			sectionSinks[i][j] = make(chan []byte, cap(source))
   246  
   247  			m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend)
   248  		}
   249  	}
   250  
   251  	process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated
   252  	results := make(chan *partialMatches, cap(source))
   253  
   254  	session.pend.Add(2)
   255  	go func() {
   256  		// Tear down the goroutine and terminate all source channels
   257  		defer session.pend.Done()
   258  		defer close(process)
   259  
   260  		defer func() {
   261  			for _, bloomSources := range sectionSources {
   262  				for _, bitSource := range bloomSources {
   263  					close(bitSource)
   264  				}
   265  			}
   266  		}()
   267  		// Read sections from the source channel and multiplex into all bit-schedulers
   268  		for {
   269  			select {
   270  			case <-session.quit:
   271  				return
   272  
   273  			case subres, ok := <-source:
   274  				// New subresult from previous link
   275  				if !ok {
   276  					return
   277  				}
   278  				// Multiplex the section index to all bit-schedulers
   279  				for _, bloomSources := range sectionSources {
   280  					for _, bitSource := range bloomSources {
   281  						select {
   282  						case <-session.quit:
   283  							return
   284  						case bitSource <- subres.section:
   285  						}
   286  					}
   287  				}
   288  				// Notify the processor that this section will become available
   289  				select {
   290  				case <-session.quit:
   291  					return
   292  				case process <- subres:
   293  				}
   294  			}
   295  		}
   296  	}()
   297  
   298  	go func() {
   299  		// Tear down the goroutine and terminate the final sink channel
   300  		defer session.pend.Done()
   301  		defer close(results)
   302  
   303  		// Read the source notifications and collect the delivered results
   304  		for {
   305  			select {
   306  			case <-session.quit:
   307  				return
   308  
   309  			case subres, ok := <-process:
   310  				// Notified of a section being retrieved
   311  				if !ok {
   312  					return
   313  				}
   314  				// Gather all the sub-results and merge them together
   315  				var orVector []byte
   316  				for _, bloomSinks := range sectionSinks {
   317  					var andVector []byte
   318  					for _, bitSink := range bloomSinks {
   319  						var data []byte
   320  						select {
   321  						case <-session.quit:
   322  							return
   323  						case data = <-bitSink:
   324  						}
   325  						if andVector == nil {
   326  							andVector = make([]byte, int(m.sectionSize/8))
   327  							copy(andVector, data)
   328  						} else {
   329  							bitutil.ANDBytes(andVector, andVector, data)
   330  						}
   331  					}
   332  					if orVector == nil {
   333  						orVector = andVector
   334  					} else {
   335  						bitutil.ORBytes(orVector, orVector, andVector)
   336  					}
   337  				}
   338  
   339  				if orVector == nil {
   340  					orVector = make([]byte, int(m.sectionSize/8))
   341  				}
   342  				if subres.bitset != nil {
   343  					bitutil.ANDBytes(orVector, orVector, subres.bitset)
   344  				}
   345  				if bitutil.TestBytes(orVector) {
   346  					select {
   347  					case <-session.quit:
   348  						return
   349  					case results <- &partialMatches{subres.section, orVector}:
   350  					}
   351  				}
   352  			}
   353  		}
   354  	}()
   355  	return results
   356  }
   357  
   358  // distributor receives requests from the schedulers and queues them into a set
   359  // of pending requests, which are assigned to retrievers wanting to fulfil them.
   360  func (m *Matcher) distributor(dist chan *request, session *MatcherSession) {
   361  	defer session.pend.Done()
   362  
   363  	var (
   364  		requests   = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number
   365  		unallocs   = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever
   366  		retrievers chan chan uint            // Waiting retrievers (toggled to nil if unallocs is empty)
   367  	)
   368  	var (
   369  		allocs   int            // Number of active allocations to handle graceful shutdown requests
   370  		shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests
   371  	)
   372  
   373  	// assign is a helper method fo try to assign a pending bit an an actively
   374  	// listening servicer, or schedule it up for later when one arrives.
   375  	assign := func(bit uint) {
   376  		select {
   377  		case fetcher := <-m.retrievers:
   378  			allocs++
   379  			fetcher <- bit
   380  		default:
   381  			// No retrievers active, start listening for new ones
   382  			retrievers = m.retrievers
   383  			unallocs[bit] = struct{}{}
   384  		}
   385  	}
   386  
   387  	for {
   388  		select {
   389  		case <-shutdown:
   390  			// Graceful shutdown requested, wait until all pending requests are honoured
   391  			if allocs == 0 {
   392  				return
   393  			}
   394  			shutdown = nil
   395  
   396  		case <-session.kill:
   397  			// Pending requests not honoured in time, hard terminate
   398  			return
   399  
   400  		case req := <-dist:
   401  			// New retrieval request arrived to be distributed to some fetcher process
   402  			queue := requests[req.bit]
   403  			index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section })
   404  			requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...)
   405  
   406  			// If it's a new bit and we have waiting fetchers, allocate to them
   407  			if len(queue) == 0 {
   408  				assign(req.bit)
   409  			}
   410  
   411  		case fetcher := <-retrievers:
   412  			// New retriever arrived, find the lowest section-ed bit to assign
   413  			bit, best := uint(0), uint64(math.MaxUint64)
   414  			for idx := range unallocs {
   415  				if requests[idx][0] < best {
   416  					bit, best = idx, requests[idx][0]
   417  				}
   418  			}
   419  			// Stop tracking this bit (and alloc notifications if no more work is available)
   420  			delete(unallocs, bit)
   421  			if len(unallocs) == 0 {
   422  				retrievers = nil
   423  			}
   424  			allocs++
   425  			fetcher <- bit
   426  
   427  		case fetcher := <-m.counters:
   428  			// New task count request arrives, return number of items
   429  			fetcher <- uint(len(requests[<-fetcher]))
   430  
   431  		case fetcher := <-m.retrievals:
   432  			// New fetcher waiting for tasks to retrieve, assign
   433  			task := <-fetcher
   434  			if want := len(task.Sections); want >= len(requests[task.Bit]) {
   435  				task.Sections = requests[task.Bit]
   436  				delete(requests, task.Bit)
   437  			} else {
   438  				task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...)
   439  				requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...)
   440  			}
   441  			fetcher <- task
   442  
   443  			// If anything was left unallocated, try to assign to someone else
   444  			if len(requests[task.Bit]) > 0 {
   445  				assign(task.Bit)
   446  			}
   447  
   448  		case result := <-m.deliveries:
   449  			// New retrieval task response from fetcher, split out missing sections and
   450  			// deliver complete ones
   451  			var (
   452  				sections = make([]uint64, 0, len(result.Sections))
   453  				bitsets  = make([][]byte, 0, len(result.Bitsets))
   454  				missing  = make([]uint64, 0, len(result.Sections))
   455  			)
   456  			for i, bitset := range result.Bitsets {
   457  				if len(bitset) == 0 {
   458  					missing = append(missing, result.Sections[i])
   459  					continue
   460  				}
   461  				sections = append(sections, result.Sections[i])
   462  				bitsets = append(bitsets, bitset)
   463  			}
   464  			m.schedulers[result.Bit].deliver(sections, bitsets)
   465  			allocs--
   466  
   467  			// Reschedule missing sections and allocate bit if newly available
   468  			if len(missing) > 0 {
   469  				queue := requests[result.Bit]
   470  				for _, section := range missing {
   471  					index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section })
   472  					queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...)
   473  				}
   474  				requests[result.Bit] = queue
   475  
   476  				if len(queue) == len(missing) {
   477  					assign(result.Bit)
   478  				}
   479  			}
   480  			// If we're in the process of shutting down, terminate
   481  			if allocs == 0 && shutdown == nil {
   482  				return
   483  			}
   484  		}
   485  	}
   486  }
   487  
   488  // MatcherSession is returned by a started matcher to be used as a terminator
   489  // for the actively running matching operation.
   490  type MatcherSession struct {
   491  	matcher *Matcher
   492  
   493  	quit chan struct{} // Quit channel to request pipeline termination
   494  	kill chan struct{} // Term channel to signal non-graceful forced shutdown
   495  	pend sync.WaitGroup
   496  }
   497  
   498  // Close stops the matching process and waits for all subprocesses to terminate
   499  // before returning. The timeout may be used for graceful shutdown, allowing the
   500  // currently running retrievals to complete before this time.
   501  func (s *MatcherSession) Close(timeout time.Duration) {
   502  	// Bail out if the matcher is not running
   503  	select {
   504  	case <-s.quit:
   505  		return
   506  	default:
   507  	}
   508  	// Signal termination and wait for all goroutines to tear down
   509  	close(s.quit)
   510  	time.AfterFunc(timeout, func() { close(s.kill) })
   511  	s.pend.Wait()
   512  }
   513  
   514  // AllocateRetrieval assigns a bloom bit index to a client process that can either
   515  // immediately reuest and fetch the section contents assigned to this bit or wait
   516  // a little while for more sections to be requested.
   517  func (s *MatcherSession) AllocateRetrieval() (uint, bool) {
   518  	fetcher := make(chan uint)
   519  
   520  	select {
   521  	case <-s.quit:
   522  		return 0, false
   523  	case s.matcher.retrievers <- fetcher:
   524  		bit, ok := <-fetcher
   525  		return bit, ok
   526  	}
   527  }
   528  
   529  // PendingSections returns the number of pending section retrievals belonging to
   530  // the given bloom bit index.
   531  func (s *MatcherSession) PendingSections(bit uint) int {
   532  	fetcher := make(chan uint)
   533  
   534  	select {
   535  	case <-s.quit:
   536  		return 0
   537  	case s.matcher.counters <- fetcher:
   538  		fetcher <- bit
   539  		return int(<-fetcher)
   540  	}
   541  }
   542  
   543  // AllocateSections assigns all or part of an already allocated bit-task queue
   544  // to the requesting process.
   545  func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 {
   546  	fetcher := make(chan *Retrieval)
   547  
   548  	select {
   549  	case <-s.quit:
   550  		return nil
   551  	case s.matcher.retrievals <- fetcher:
   552  		task := &Retrieval{
   553  			Bit:      bit,
   554  			Sections: make([]uint64, count),
   555  		}
   556  		fetcher <- task
   557  		return (<-fetcher).Sections
   558  	}
   559  }
   560  
   561  // DeliverSections delivers a batch of section bit-vectors for a specific bloom
   562  // bit index to be injected into the processing pipeline.
   563  func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) {
   564  	select {
   565  	case <-s.kill:
   566  		return
   567  	case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}:
   568  	}
   569  }
   570  
   571  // Multiplex polls the matcher session for rerieval tasks and multiplexes it into
   572  // the reuested retrieval queue to be serviced together with other sessions.
   573  //
   574  // This method will block for the lifetime of the session. Even after termination
   575  // of the session, any request in-flight need to be responded to! Empty responses
   576  // are fine though in that case.
   577  func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) {
   578  	for {
   579  		// Allocate a new bloom bit index to retrieve data for, stopping when done
   580  		bit, ok := s.AllocateRetrieval()
   581  		if !ok {
   582  			return
   583  		}
   584  		// Bit allocated, throttle a bit if we're below our batch limit
   585  		if s.PendingSections(bit) < batch {
   586  			select {
   587  			case <-s.quit:
   588  				// Session terminating, we can't meaningfully service, abort
   589  				s.AllocateSections(bit, 0)
   590  				s.DeliverSections(bit, []uint64{}, [][]byte{})
   591  				return
   592  
   593  			case <-time.After(wait):
   594  				// Throttling up, fetch whatever's available
   595  			}
   596  		}
   597  		// Allocate as much as we can handle and request servicing
   598  		sections := s.AllocateSections(bit, batch)
   599  		request := make(chan *Retrieval)
   600  
   601  		select {
   602  		case <-s.quit:
   603  			// Session terminating, we can't meaningfully service, abort
   604  			s.DeliverSections(bit, sections, make([][]byte, len(sections)))
   605  			return
   606  
   607  		case mux <- request:
   608  			// Retrieval accepted, something must arrive before we're aborting
   609  			request <- &Retrieval{Bit: bit, Sections: sections}
   610  
   611  			result := <-request
   612  			s.DeliverSections(result.Bit, result.Sections, result.Bitsets)
   613  		}
   614  	}
   615  }