github.com/MetalBlockchain/subnet-evm@v0.4.9/core/bloombits/matcher.go (about) 1 // (c) 2019-2020, Ava Labs, Inc. 2 // 3 // This file is a derived work, based on the go-ethereum library whose original 4 // notices appear below. 5 // 6 // It is distributed under a license compatible with the licensing terms of the 7 // original code from which it is derived. 8 // 9 // Much love to the original authors for their work. 10 // ********** 11 // Copyright 2017 The go-ethereum Authors 12 // This file is part of the go-ethereum library. 13 // 14 // The go-ethereum library is free software: you can redistribute it and/or modify 15 // it under the terms of the GNU Lesser General Public License as published by 16 // the Free Software Foundation, either version 3 of the License, or 17 // (at your option) any later version. 18 // 19 // The go-ethereum library is distributed in the hope that it will be useful, 20 // but WITHOUT ANY WARRANTY; without even the implied warranty of 21 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 // GNU Lesser General Public License for more details. 23 // 24 // You should have received a copy of the GNU Lesser General Public License 25 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 26 27 package bloombits 28 29 import ( 30 "bytes" 31 "context" 32 "errors" 33 "math" 34 "sort" 35 "sync" 36 "sync/atomic" 37 "time" 38 39 "github.com/ethereum/go-ethereum/common/bitutil" 40 "github.com/ethereum/go-ethereum/crypto" 41 ) 42 43 // bloomIndexes represents the bit indexes inside the bloom filter that belong 44 // to some key. 45 type bloomIndexes [3]uint 46 47 // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key. 48 func calcBloomIndexes(b []byte) bloomIndexes { 49 b = crypto.Keccak256(b) 50 51 var idxs bloomIndexes 52 for i := 0; i < len(idxs); i++ { 53 idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1]) 54 } 55 return idxs 56 } 57 58 // partialMatches with a non-nil vector represents a section in which some sub- 59 // matchers have already found potential matches. Subsequent sub-matchers will 60 // binary AND their matches with this vector. If vector is nil, it represents a 61 // section to be processed by the first sub-matcher. 62 type partialMatches struct { 63 section uint64 64 bitset []byte 65 } 66 67 // Retrieval represents a request for retrieval task assignments for a given 68 // bit with the given number of fetch elements, or a response for such a request. 69 // It can also have the actual results set to be used as a delivery data struct. 70 // 71 // The contest and error fields are used by the light client to terminate matching 72 // early if an error is encountered on some path of the pipeline. 73 type Retrieval struct { 74 Bit uint 75 Sections []uint64 76 Bitsets [][]byte 77 78 Context context.Context 79 Error error 80 } 81 82 // Matcher is a pipelined system of schedulers and logic matchers which perform 83 // binary AND/OR operations on the bit-streams, creating a stream of potential 84 // blocks to inspect for data content. 85 type Matcher struct { 86 sectionSize uint64 // Size of the data batches to filter on 87 88 filters [][]bloomIndexes // Filter the system is matching for 89 schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits 90 91 retrievers chan chan uint // Retriever processes waiting for bit allocations 92 counters chan chan uint // Retriever processes waiting for task count reports 93 retrievals chan chan *Retrieval // Retriever processes waiting for task allocations 94 deliveries chan *Retrieval // Retriever processes waiting for task response deliveries 95 96 running uint32 // Atomic flag whether a session is live or not 97 } 98 99 // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing 100 // address and topic filtering on them. Setting a filter component to `nil` is 101 // allowed and will result in that filter rule being skipped (OR 0x11...1). 102 func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher { 103 // Create the matcher instance 104 m := &Matcher{ 105 sectionSize: sectionSize, 106 schedulers: make(map[uint]*scheduler), 107 retrievers: make(chan chan uint), 108 counters: make(chan chan uint), 109 retrievals: make(chan chan *Retrieval), 110 deliveries: make(chan *Retrieval), 111 } 112 // Calculate the bloom bit indexes for the groups we're interested in 113 m.filters = nil 114 115 for _, filter := range filters { 116 // Gather the bit indexes of the filter rule, special casing the nil filter 117 if len(filter) == 0 { 118 continue 119 } 120 bloomBits := make([]bloomIndexes, len(filter)) 121 for i, clause := range filter { 122 if clause == nil { 123 bloomBits = nil 124 break 125 } 126 bloomBits[i] = calcBloomIndexes(clause) 127 } 128 // Accumulate the filter rules if no nil rule was within 129 if bloomBits != nil { 130 m.filters = append(m.filters, bloomBits) 131 } 132 } 133 // For every bit, create a scheduler to load/download the bit vectors 134 for _, bloomIndexLists := range m.filters { 135 for _, bloomIndexList := range bloomIndexLists { 136 for _, bloomIndex := range bloomIndexList { 137 m.addScheduler(bloomIndex) 138 } 139 } 140 } 141 return m 142 } 143 144 // addScheduler adds a bit stream retrieval scheduler for the given bit index if 145 // it has not existed before. If the bit is already selected for filtering, the 146 // existing scheduler can be used. 147 func (m *Matcher) addScheduler(idx uint) { 148 if _, ok := m.schedulers[idx]; ok { 149 return 150 } 151 m.schedulers[idx] = newScheduler(idx) 152 } 153 154 // Start starts the matching process and returns a stream of bloom matches in 155 // a given range of blocks. If there are no more matches in the range, the result 156 // channel is closed. 157 func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) { 158 // Make sure we're not creating concurrent sessions 159 if atomic.SwapUint32(&m.running, 1) == 1 { 160 return nil, errors.New("matcher already running") 161 } 162 defer atomic.StoreUint32(&m.running, 0) 163 164 // Initiate a new matching round 165 session := &MatcherSession{ 166 matcher: m, 167 quit: make(chan struct{}), 168 ctx: ctx, 169 } 170 for _, scheduler := range m.schedulers { 171 scheduler.reset() 172 } 173 sink := m.run(begin, end, cap(results), session) 174 175 // Read the output from the result sink and deliver to the user 176 session.pend.Add(1) 177 go func() { 178 defer session.pend.Done() 179 defer close(results) 180 181 for { 182 select { 183 case <-session.quit: 184 return 185 186 case res, ok := <-sink: 187 // New match result found 188 if !ok { 189 return 190 } 191 // Calculate the first and last blocks of the section 192 sectionStart := res.section * m.sectionSize 193 194 first := sectionStart 195 if begin > first { 196 first = begin 197 } 198 last := sectionStart + m.sectionSize - 1 199 if end < last { 200 last = end 201 } 202 // Iterate over all the blocks in the section and return the matching ones 203 for i := first; i <= last; i++ { 204 // Skip the entire byte if no matches are found inside (and we're processing an entire byte!) 205 next := res.bitset[(i-sectionStart)/8] 206 if next == 0 { 207 if i%8 == 0 { 208 i += 7 209 } 210 continue 211 } 212 // Some bit it set, do the actual submatching 213 if bit := 7 - i%8; next&(1<<bit) != 0 { 214 select { 215 case <-session.quit: 216 return 217 case results <- i: 218 } 219 } 220 } 221 } 222 } 223 }() 224 return session, nil 225 } 226 227 // run creates a daisy-chain of sub-matchers, one for the address set and one 228 // for each topic set, each sub-matcher receiving a section only if the previous 229 // ones have all found a potential match in one of the blocks of the section, 230 // then binary AND-ing its own matches and forwarding the result to the next one. 231 // 232 // The method starts feeding the section indexes into the first sub-matcher on a 233 // new goroutine and returns a sink channel receiving the results. 234 func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches { 235 // Create the source channel and feed section indexes into 236 source := make(chan *partialMatches, buffer) 237 238 session.pend.Add(1) 239 go func() { 240 defer session.pend.Done() 241 defer close(source) 242 243 for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ { 244 select { 245 case <-session.quit: 246 return 247 case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}: 248 } 249 } 250 }() 251 // Assemble the daisy-chained filtering pipeline 252 next := source 253 dist := make(chan *request, buffer) 254 255 for _, bloom := range m.filters { 256 next = m.subMatch(next, dist, bloom, session) 257 } 258 // Start the request distribution 259 session.pend.Add(1) 260 go m.distributor(dist, session) 261 262 return next 263 } 264 265 // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then 266 // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output. 267 // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to 268 // that address/topic, and binary AND-ing those vectors together. 269 func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches { 270 // Start the concurrent schedulers for each bit required by the bloom filter 271 sectionSources := make([][3]chan uint64, len(bloom)) 272 sectionSinks := make([][3]chan []byte, len(bloom)) 273 for i, bits := range bloom { 274 for j, bit := range bits { 275 sectionSources[i][j] = make(chan uint64, cap(source)) 276 sectionSinks[i][j] = make(chan []byte, cap(source)) 277 278 m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend) 279 } 280 } 281 282 process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated 283 results := make(chan *partialMatches, cap(source)) 284 285 session.pend.Add(2) 286 go func() { 287 // Tear down the goroutine and terminate all source channels 288 defer session.pend.Done() 289 defer close(process) 290 291 defer func() { 292 for _, bloomSources := range sectionSources { 293 for _, bitSource := range bloomSources { 294 close(bitSource) 295 } 296 } 297 }() 298 // Read sections from the source channel and multiplex into all bit-schedulers 299 for { 300 select { 301 case <-session.quit: 302 return 303 304 case subres, ok := <-source: 305 // New subresult from previous link 306 if !ok { 307 return 308 } 309 // Multiplex the section index to all bit-schedulers 310 for _, bloomSources := range sectionSources { 311 for _, bitSource := range bloomSources { 312 select { 313 case <-session.quit: 314 return 315 case bitSource <- subres.section: 316 } 317 } 318 } 319 // Notify the processor that this section will become available 320 select { 321 case <-session.quit: 322 return 323 case process <- subres: 324 } 325 } 326 } 327 }() 328 329 go func() { 330 // Tear down the goroutine and terminate the final sink channel 331 defer session.pend.Done() 332 defer close(results) 333 334 // Read the source notifications and collect the delivered results 335 for { 336 select { 337 case <-session.quit: 338 return 339 340 case subres, ok := <-process: 341 // Notified of a section being retrieved 342 if !ok { 343 return 344 } 345 // Gather all the sub-results and merge them together 346 var orVector []byte 347 for _, bloomSinks := range sectionSinks { 348 var andVector []byte 349 for _, bitSink := range bloomSinks { 350 var data []byte 351 select { 352 case <-session.quit: 353 return 354 case data = <-bitSink: 355 } 356 if andVector == nil { 357 andVector = make([]byte, int(m.sectionSize/8)) 358 copy(andVector, data) 359 } else { 360 bitutil.ANDBytes(andVector, andVector, data) 361 } 362 } 363 if orVector == nil { 364 orVector = andVector 365 } else { 366 bitutil.ORBytes(orVector, orVector, andVector) 367 } 368 } 369 370 if orVector == nil { 371 orVector = make([]byte, int(m.sectionSize/8)) 372 } 373 if subres.bitset != nil { 374 bitutil.ANDBytes(orVector, orVector, subres.bitset) 375 } 376 if bitutil.TestBytes(orVector) { 377 select { 378 case <-session.quit: 379 return 380 case results <- &partialMatches{subres.section, orVector}: 381 } 382 } 383 } 384 } 385 }() 386 return results 387 } 388 389 // distributor receives requests from the schedulers and queues them into a set 390 // of pending requests, which are assigned to retrievers wanting to fulfil them. 391 func (m *Matcher) distributor(dist chan *request, session *MatcherSession) { 392 defer session.pend.Done() 393 394 var ( 395 requests = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number 396 unallocs = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever 397 retrievers chan chan uint // Waiting retrievers (toggled to nil if unallocs is empty) 398 allocs int // Number of active allocations to handle graceful shutdown requests 399 shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests 400 ) 401 402 // assign is a helper method fo try to assign a pending bit an actively 403 // listening servicer, or schedule it up for later when one arrives. 404 assign := func(bit uint) { 405 select { 406 case fetcher := <-m.retrievers: 407 allocs++ 408 fetcher <- bit 409 default: 410 // No retrievers active, start listening for new ones 411 retrievers = m.retrievers 412 unallocs[bit] = struct{}{} 413 } 414 } 415 416 for { 417 select { 418 case <-shutdown: 419 // Shutdown requested. No more retrievers can be allocated, 420 // but we still need to wait until all pending requests have returned. 421 shutdown = nil 422 if allocs == 0 { 423 return 424 } 425 426 case req := <-dist: 427 // New retrieval request arrived to be distributed to some fetcher process 428 queue := requests[req.bit] 429 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section }) 430 requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...) 431 432 // If it's a new bit and we have waiting fetchers, allocate to them 433 if len(queue) == 0 { 434 assign(req.bit) 435 } 436 437 case fetcher := <-retrievers: 438 // New retriever arrived, find the lowest section-ed bit to assign 439 bit, best := uint(0), uint64(math.MaxUint64) 440 for idx := range unallocs { 441 if requests[idx][0] < best { 442 bit, best = idx, requests[idx][0] 443 } 444 } 445 // Stop tracking this bit (and alloc notifications if no more work is available) 446 delete(unallocs, bit) 447 if len(unallocs) == 0 { 448 retrievers = nil 449 } 450 allocs++ 451 fetcher <- bit 452 453 case fetcher := <-m.counters: 454 // New task count request arrives, return number of items 455 fetcher <- uint(len(requests[<-fetcher])) 456 457 case fetcher := <-m.retrievals: 458 // New fetcher waiting for tasks to retrieve, assign 459 task := <-fetcher 460 if want := len(task.Sections); want >= len(requests[task.Bit]) { 461 task.Sections = requests[task.Bit] 462 delete(requests, task.Bit) 463 } else { 464 task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...) 465 requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...) 466 } 467 fetcher <- task 468 469 // If anything was left unallocated, try to assign to someone else 470 if len(requests[task.Bit]) > 0 { 471 assign(task.Bit) 472 } 473 474 case result := <-m.deliveries: 475 // New retrieval task response from fetcher, split out missing sections and 476 // deliver complete ones 477 var ( 478 sections = make([]uint64, 0, len(result.Sections)) 479 bitsets = make([][]byte, 0, len(result.Bitsets)) 480 missing = make([]uint64, 0, len(result.Sections)) 481 ) 482 for i, bitset := range result.Bitsets { 483 if len(bitset) == 0 { 484 missing = append(missing, result.Sections[i]) 485 continue 486 } 487 sections = append(sections, result.Sections[i]) 488 bitsets = append(bitsets, bitset) 489 } 490 m.schedulers[result.Bit].deliver(sections, bitsets) 491 allocs-- 492 493 // Reschedule missing sections and allocate bit if newly available 494 if len(missing) > 0 { 495 queue := requests[result.Bit] 496 for _, section := range missing { 497 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section }) 498 queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...) 499 } 500 requests[result.Bit] = queue 501 502 if len(queue) == len(missing) { 503 assign(result.Bit) 504 } 505 } 506 507 // End the session when all pending deliveries have arrived. 508 if shutdown == nil && allocs == 0 { 509 return 510 } 511 } 512 } 513 } 514 515 // MatcherSession is returned by a started matcher to be used as a terminator 516 // for the actively running matching operation. 517 type MatcherSession struct { 518 matcher *Matcher 519 520 closer sync.Once // Sync object to ensure we only ever close once 521 quit chan struct{} // Quit channel to request pipeline termination 522 523 ctx context.Context // Context used by the light client to abort filtering 524 err error // Global error to track retrieval failures deep in the chain 525 errLock sync.Mutex 526 527 pend sync.WaitGroup 528 } 529 530 // Close stops the matching process and waits for all subprocesses to terminate 531 // before returning. The timeout may be used for graceful shutdown, allowing the 532 // currently running retrievals to complete before this time. 533 func (s *MatcherSession) Close() { 534 s.closer.Do(func() { 535 // Signal termination and wait for all goroutines to tear down 536 close(s.quit) 537 s.pend.Wait() 538 }) 539 } 540 541 // Error returns any failure encountered during the matching session. 542 func (s *MatcherSession) Error() error { 543 s.errLock.Lock() 544 defer s.errLock.Unlock() 545 546 return s.err 547 } 548 549 // allocateRetrieval assigns a bloom bit index to a client process that can either 550 // immediately request and fetch the section contents assigned to this bit or wait 551 // a little while for more sections to be requested. 552 func (s *MatcherSession) allocateRetrieval() (uint, bool) { 553 fetcher := make(chan uint) 554 555 select { 556 case <-s.quit: 557 return 0, false 558 case s.matcher.retrievers <- fetcher: 559 bit, ok := <-fetcher 560 return bit, ok 561 } 562 } 563 564 // pendingSections returns the number of pending section retrievals belonging to 565 // the given bloom bit index. 566 func (s *MatcherSession) pendingSections(bit uint) int { 567 fetcher := make(chan uint) 568 569 select { 570 case <-s.quit: 571 return 0 572 case s.matcher.counters <- fetcher: 573 fetcher <- bit 574 return int(<-fetcher) 575 } 576 } 577 578 // allocateSections assigns all or part of an already allocated bit-task queue 579 // to the requesting process. 580 func (s *MatcherSession) allocateSections(bit uint, count int) []uint64 { 581 fetcher := make(chan *Retrieval) 582 583 select { 584 case <-s.quit: 585 return nil 586 case s.matcher.retrievals <- fetcher: 587 task := &Retrieval{ 588 Bit: bit, 589 Sections: make([]uint64, count), 590 } 591 fetcher <- task 592 return (<-fetcher).Sections 593 } 594 } 595 596 // deliverSections delivers a batch of section bit-vectors for a specific bloom 597 // bit index to be injected into the processing pipeline. 598 func (s *MatcherSession) deliverSections(bit uint, sections []uint64, bitsets [][]byte) { 599 s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets} 600 } 601 602 // Multiplex polls the matcher session for retrieval tasks and multiplexes it into 603 // the requested retrieval queue to be serviced together with other sessions. 604 // 605 // This method will block for the lifetime of the session. Even after termination 606 // of the session, any request in-flight need to be responded to! Empty responses 607 // are fine though in that case. 608 func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) { 609 for { 610 // Allocate a new bloom bit index to retrieve data for, stopping when done 611 bit, ok := s.allocateRetrieval() 612 if !ok { 613 return 614 } 615 // Bit allocated, throttle a bit if we're below our batch limit 616 if s.pendingSections(bit) < batch { 617 select { 618 case <-s.quit: 619 // Session terminating, we can't meaningfully service, abort 620 s.allocateSections(bit, 0) 621 s.deliverSections(bit, []uint64{}, [][]byte{}) 622 return 623 624 case <-time.After(wait): 625 // Throttling up, fetch whatever's available 626 } 627 } 628 // Allocate as much as we can handle and request servicing 629 sections := s.allocateSections(bit, batch) 630 request := make(chan *Retrieval) 631 632 select { 633 case <-s.quit: 634 // Session terminating, we can't meaningfully service, abort 635 s.deliverSections(bit, sections, make([][]byte, len(sections))) 636 return 637 638 case mux <- request: 639 // Retrieval accepted, something must arrive before we're aborting 640 request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx} 641 642 result := <-request 643 if result.Error != nil { 644 s.errLock.Lock() 645 s.err = result.Error 646 s.errLock.Unlock() 647 s.Close() 648 } 649 s.deliverSections(result.Bit, result.Sections, result.Bitsets) 650 } 651 } 652 }