github.com/fff-chain/go-fff@v0.0.0-20220726032732-1c84420b8a99/core/bloombits/matcher.go (about) 1 // Copyright 2017 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package bloombits 18 19 import ( 20 "bytes" 21 "context" 22 "errors" 23 "math" 24 "sort" 25 "sync" 26 "sync/atomic" 27 "time" 28 29 "github.com/fff-chain/go-fff/common/bitutil" 30 "github.com/fff-chain/go-fff/common/gopool" 31 "github.com/fff-chain/go-fff/crypto" 32 ) 33 34 // bloomIndexes represents the bit indexes inside the bloom filter that belong 35 // to some key. 36 type bloomIndexes [3]uint 37 38 // calcBloomIndexes returns the bloom filter bit indexes belonging to the given key. 39 func calcBloomIndexes(b []byte) bloomIndexes { 40 b = crypto.Keccak256(b) 41 42 var idxs bloomIndexes 43 for i := 0; i < len(idxs); i++ { 44 idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1]) 45 } 46 return idxs 47 } 48 49 // partialMatches with a non-nil vector represents a section in which some sub- 50 // matchers have already found potential matches. Subsequent sub-matchers will 51 // binary AND their matches with this vector. If vector is nil, it represents a 52 // section to be processed by the first sub-matcher. 53 type partialMatches struct { 54 section uint64 55 bitset []byte 56 } 57 58 // Retrieval represents a request for retrieval task assignments for a given 59 // bit with the given number of fetch elements, or a response for such a request. 60 // It can also have the actual results set to be used as a delivery data struct. 61 // 62 // The contest and error fields are used by the light client to terminate matching 63 // early if an error is encountered on some path of the pipeline. 64 type Retrieval struct { 65 Bit uint 66 Sections []uint64 67 Bitsets [][]byte 68 69 Context context.Context 70 Error error 71 } 72 73 // Matcher is a pipelined system of schedulers and logic matchers which perform 74 // binary AND/OR operations on the bit-streams, creating a stream of potential 75 // blocks to inspect for data content. 76 type Matcher struct { 77 sectionSize uint64 // Size of the data batches to filter on 78 79 filters [][]bloomIndexes // Filter the system is matching for 80 schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits 81 82 retrievers chan chan uint // Retriever processes waiting for bit allocations 83 counters chan chan uint // Retriever processes waiting for task count reports 84 retrievals chan chan *Retrieval // Retriever processes waiting for task allocations 85 deliveries chan *Retrieval // Retriever processes waiting for task response deliveries 86 87 running uint32 // Atomic flag whether a session is live or not 88 } 89 90 // NewMatcher creates a new pipeline for retrieving bloom bit streams and doing 91 // address and topic filtering on them. Setting a filter component to `nil` is 92 // allowed and will result in that filter rule being skipped (OR 0x11...1). 93 func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher { 94 // Create the matcher instance 95 m := &Matcher{ 96 sectionSize: sectionSize, 97 schedulers: make(map[uint]*scheduler), 98 retrievers: make(chan chan uint), 99 counters: make(chan chan uint), 100 retrievals: make(chan chan *Retrieval), 101 deliveries: make(chan *Retrieval), 102 } 103 // Calculate the bloom bit indexes for the groups we're interested in 104 m.filters = nil 105 106 for _, filter := range filters { 107 // Gather the bit indexes of the filter rule, special casing the nil filter 108 if len(filter) == 0 { 109 continue 110 } 111 bloomBits := make([]bloomIndexes, len(filter)) 112 for i, clause := range filter { 113 if clause == nil { 114 bloomBits = nil 115 break 116 } 117 bloomBits[i] = calcBloomIndexes(clause) 118 } 119 // Accumulate the filter rules if no nil rule was within 120 if bloomBits != nil { 121 m.filters = append(m.filters, bloomBits) 122 } 123 } 124 // For every bit, create a scheduler to load/download the bit vectors 125 for _, bloomIndexLists := range m.filters { 126 for _, bloomIndexList := range bloomIndexLists { 127 for _, bloomIndex := range bloomIndexList { 128 m.addScheduler(bloomIndex) 129 } 130 } 131 } 132 return m 133 } 134 135 // addScheduler adds a bit stream retrieval scheduler for the given bit index if 136 // it has not existed before. If the bit is already selected for filtering, the 137 // existing scheduler can be used. 138 func (m *Matcher) addScheduler(idx uint) { 139 if _, ok := m.schedulers[idx]; ok { 140 return 141 } 142 m.schedulers[idx] = newScheduler(idx) 143 } 144 145 // Start starts the matching process and returns a stream of bloom matches in 146 // a given range of blocks. If there are no more matches in the range, the result 147 // channel is closed. 148 func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) { 149 // Make sure we're not creating concurrent sessions 150 if atomic.SwapUint32(&m.running, 1) == 1 { 151 return nil, errors.New("matcher already running") 152 } 153 defer atomic.StoreUint32(&m.running, 0) 154 155 // Initiate a new matching round 156 session := &MatcherSession{ 157 matcher: m, 158 quit: make(chan struct{}), 159 ctx: ctx, 160 } 161 for _, scheduler := range m.schedulers { 162 scheduler.reset() 163 } 164 sink := m.run(begin, end, cap(results), session) 165 166 // Read the output from the result sink and deliver to the user 167 session.pend.Add(1) 168 gopool.Submit(func() { 169 defer session.pend.Done() 170 defer close(results) 171 172 for { 173 select { 174 case <-session.quit: 175 return 176 177 case res, ok := <-sink: 178 // New match result found 179 if !ok { 180 return 181 } 182 // Calculate the first and last blocks of the section 183 sectionStart := res.section * m.sectionSize 184 185 first := sectionStart 186 if begin > first { 187 first = begin 188 } 189 last := sectionStart + m.sectionSize - 1 190 if end < last { 191 last = end 192 } 193 // Iterate over all the blocks in the section and return the matching ones 194 for i := first; i <= last; i++ { 195 // Skip the entire byte if no matches are found inside (and we're processing an entire byte!) 196 next := res.bitset[(i-sectionStart)/8] 197 if next == 0 { 198 if i%8 == 0 { 199 i += 7 200 } 201 continue 202 } 203 // Some bit it set, do the actual submatching 204 if bit := 7 - i%8; next&(1<<bit) != 0 { 205 select { 206 case <-session.quit: 207 return 208 case results <- i: 209 } 210 } 211 } 212 } 213 } 214 }) 215 return session, nil 216 } 217 218 // run creates a daisy-chain of sub-matchers, one for the address set and one 219 // for each topic set, each sub-matcher receiving a section only if the previous 220 // ones have all found a potential match in one of the blocks of the section, 221 // then binary AND-ing its own matches and forwarding the result to the next one. 222 // 223 // The method starts feeding the section indexes into the first sub-matcher on a 224 // new goroutine and returns a sink channel receiving the results. 225 func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches { 226 // Create the source channel and feed section indexes into 227 source := make(chan *partialMatches, buffer) 228 229 session.pend.Add(1) 230 gopool.Submit(func() { 231 defer session.pend.Done() 232 defer close(source) 233 234 for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ { 235 select { 236 case <-session.quit: 237 return 238 case source <- &partialMatches{i, bytes.Repeat([]byte{0xff}, int(m.sectionSize/8))}: 239 } 240 } 241 }) 242 // Assemble the daisy-chained filtering pipeline 243 next := source 244 dist := make(chan *request, buffer) 245 246 for _, bloom := range m.filters { 247 next = m.subMatch(next, dist, bloom, session) 248 } 249 // Start the request distribution 250 session.pend.Add(1) 251 gopool.Submit(func() { 252 m.distributor(dist, session) 253 }) 254 255 return next 256 } 257 258 // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then 259 // binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output. 260 // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to 261 // that address/topic, and binary AND-ing those vectors together. 262 func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches { 263 // Start the concurrent schedulers for each bit required by the bloom filter 264 sectionSources := make([][3]chan uint64, len(bloom)) 265 sectionSinks := make([][3]chan []byte, len(bloom)) 266 for i, bits := range bloom { 267 for j, bit := range bits { 268 sectionSources[i][j] = make(chan uint64, cap(source)) 269 sectionSinks[i][j] = make(chan []byte, cap(source)) 270 271 m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend) 272 } 273 } 274 275 process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated 276 results := make(chan *partialMatches, cap(source)) 277 278 session.pend.Add(2) 279 gopool.Submit(func() { 280 // Tear down the goroutine and terminate all source channels 281 defer session.pend.Done() 282 defer close(process) 283 284 defer func() { 285 for _, bloomSources := range sectionSources { 286 for _, bitSource := range bloomSources { 287 close(bitSource) 288 } 289 } 290 }() 291 // Read sections from the source channel and multiplex into all bit-schedulers 292 for { 293 select { 294 case <-session.quit: 295 return 296 297 case subres, ok := <-source: 298 // New subresult from previous link 299 if !ok { 300 return 301 } 302 // Multiplex the section index to all bit-schedulers 303 for _, bloomSources := range sectionSources { 304 for _, bitSource := range bloomSources { 305 select { 306 case <-session.quit: 307 return 308 case bitSource <- subres.section: 309 } 310 } 311 } 312 // Notify the processor that this section will become available 313 select { 314 case <-session.quit: 315 return 316 case process <- subres: 317 } 318 } 319 } 320 }) 321 322 gopool.Submit(func() { 323 // Tear down the goroutine and terminate the final sink channel 324 defer session.pend.Done() 325 defer close(results) 326 327 // Read the source notifications and collect the delivered results 328 for { 329 select { 330 case <-session.quit: 331 return 332 333 case subres, ok := <-process: 334 // Notified of a section being retrieved 335 if !ok { 336 return 337 } 338 // Gather all the sub-results and merge them together 339 var orVector []byte 340 for _, bloomSinks := range sectionSinks { 341 var andVector []byte 342 for _, bitSink := range bloomSinks { 343 var data []byte 344 select { 345 case <-session.quit: 346 return 347 case data = <-bitSink: 348 } 349 if andVector == nil { 350 andVector = make([]byte, int(m.sectionSize/8)) 351 copy(andVector, data) 352 } else { 353 bitutil.ANDBytes(andVector, andVector, data) 354 } 355 } 356 if orVector == nil { 357 orVector = andVector 358 } else { 359 bitutil.ORBytes(orVector, orVector, andVector) 360 } 361 } 362 363 if orVector == nil { 364 orVector = make([]byte, int(m.sectionSize/8)) 365 } 366 if subres.bitset != nil { 367 bitutil.ANDBytes(orVector, orVector, subres.bitset) 368 } 369 if bitutil.TestBytes(orVector) { 370 select { 371 case <-session.quit: 372 return 373 case results <- &partialMatches{subres.section, orVector}: 374 } 375 } 376 } 377 } 378 }) 379 return results 380 } 381 382 // distributor receives requests from the schedulers and queues them into a set 383 // of pending requests, which are assigned to retrievers wanting to fulfil them. 384 func (m *Matcher) distributor(dist chan *request, session *MatcherSession) { 385 defer session.pend.Done() 386 387 var ( 388 requests = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number 389 unallocs = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever 390 retrievers chan chan uint // Waiting retrievers (toggled to nil if unallocs is empty) 391 allocs int // Number of active allocations to handle graceful shutdown requests 392 shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests 393 ) 394 395 // assign is a helper method fo try to assign a pending bit an actively 396 // listening servicer, or schedule it up for later when one arrives. 397 assign := func(bit uint) { 398 select { 399 case fetcher := <-m.retrievers: 400 allocs++ 401 fetcher <- bit 402 default: 403 // No retrievers active, start listening for new ones 404 retrievers = m.retrievers 405 unallocs[bit] = struct{}{} 406 } 407 } 408 409 for { 410 select { 411 case <-shutdown: 412 // Shutdown requested. No more retrievers can be allocated, 413 // but we still need to wait until all pending requests have returned. 414 shutdown = nil 415 if allocs == 0 { 416 return 417 } 418 419 case req := <-dist: 420 // New retrieval request arrived to be distributed to some fetcher process 421 queue := requests[req.bit] 422 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section }) 423 requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...) 424 425 // If it's a new bit and we have waiting fetchers, allocate to them 426 if len(queue) == 0 { 427 assign(req.bit) 428 } 429 430 case fetcher := <-retrievers: 431 // New retriever arrived, find the lowest section-ed bit to assign 432 bit, best := uint(0), uint64(math.MaxUint64) 433 for idx := range unallocs { 434 if requests[idx][0] < best { 435 bit, best = idx, requests[idx][0] 436 } 437 } 438 // Stop tracking this bit (and alloc notifications if no more work is available) 439 delete(unallocs, bit) 440 if len(unallocs) == 0 { 441 retrievers = nil 442 } 443 allocs++ 444 fetcher <- bit 445 446 case fetcher := <-m.counters: 447 // New task count request arrives, return number of items 448 fetcher <- uint(len(requests[<-fetcher])) 449 450 case fetcher := <-m.retrievals: 451 // New fetcher waiting for tasks to retrieve, assign 452 task := <-fetcher 453 if want := len(task.Sections); want >= len(requests[task.Bit]) { 454 task.Sections = requests[task.Bit] 455 delete(requests, task.Bit) 456 } else { 457 task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...) 458 requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...) 459 } 460 fetcher <- task 461 462 // If anything was left unallocated, try to assign to someone else 463 if len(requests[task.Bit]) > 0 { 464 assign(task.Bit) 465 } 466 467 case result := <-m.deliveries: 468 // New retrieval task response from fetcher, split out missing sections and 469 // deliver complete ones 470 var ( 471 sections = make([]uint64, 0, len(result.Sections)) 472 bitsets = make([][]byte, 0, len(result.Bitsets)) 473 missing = make([]uint64, 0, len(result.Sections)) 474 ) 475 for i, bitset := range result.Bitsets { 476 if len(bitset) == 0 { 477 missing = append(missing, result.Sections[i]) 478 continue 479 } 480 sections = append(sections, result.Sections[i]) 481 bitsets = append(bitsets, bitset) 482 } 483 m.schedulers[result.Bit].deliver(sections, bitsets) 484 allocs-- 485 486 // Reschedule missing sections and allocate bit if newly available 487 if len(missing) > 0 { 488 queue := requests[result.Bit] 489 for _, section := range missing { 490 index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section }) 491 queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...) 492 } 493 requests[result.Bit] = queue 494 495 if len(queue) == len(missing) { 496 assign(result.Bit) 497 } 498 } 499 500 // End the session when all pending deliveries have arrived. 501 if shutdown == nil && allocs == 0 { 502 return 503 } 504 } 505 } 506 } 507 508 // MatcherSession is returned by a started matcher to be used as a terminator 509 // for the actively running matching operation. 510 type MatcherSession struct { 511 matcher *Matcher 512 513 closer sync.Once // Sync object to ensure we only ever close once 514 quit chan struct{} // Quit channel to request pipeline termination 515 516 ctx context.Context // Context used by the light client to abort filtering 517 err atomic.Value // Global error to track retrieval failures deep in the chain 518 519 pend sync.WaitGroup 520 } 521 522 // Close stops the matching process and waits for all subprocesses to terminate 523 // before returning. The timeout may be used for graceful shutdown, allowing the 524 // currently running retrievals to complete before this time. 525 func (s *MatcherSession) Close() { 526 s.closer.Do(func() { 527 // Signal termination and wait for all goroutines to tear down 528 close(s.quit) 529 s.pend.Wait() 530 }) 531 } 532 533 // Error returns any failure encountered during the matching session. 534 func (s *MatcherSession) Error() error { 535 if err := s.err.Load(); err != nil { 536 return err.(error) 537 } 538 return nil 539 } 540 541 // allocateRetrieval assigns a bloom bit index to a client process that can either 542 // immediately request and fetch the section contents assigned to this bit or wait 543 // a little while for more sections to be requested. 544 func (s *MatcherSession) allocateRetrieval() (uint, bool) { 545 fetcher := make(chan uint) 546 547 select { 548 case <-s.quit: 549 return 0, false 550 case s.matcher.retrievers <- fetcher: 551 bit, ok := <-fetcher 552 return bit, ok 553 } 554 } 555 556 // pendingSections returns the number of pending section retrievals belonging to 557 // the given bloom bit index. 558 func (s *MatcherSession) pendingSections(bit uint) int { 559 fetcher := make(chan uint) 560 561 select { 562 case <-s.quit: 563 return 0 564 case s.matcher.counters <- fetcher: 565 fetcher <- bit 566 return int(<-fetcher) 567 } 568 } 569 570 // allocateSections assigns all or part of an already allocated bit-task queue 571 // to the requesting process. 572 func (s *MatcherSession) allocateSections(bit uint, count int) []uint64 { 573 fetcher := make(chan *Retrieval) 574 575 select { 576 case <-s.quit: 577 return nil 578 case s.matcher.retrievals <- fetcher: 579 task := &Retrieval{ 580 Bit: bit, 581 Sections: make([]uint64, count), 582 } 583 fetcher <- task 584 return (<-fetcher).Sections 585 } 586 } 587 588 // deliverSections delivers a batch of section bit-vectors for a specific bloom 589 // bit index to be injected into the processing pipeline. 590 func (s *MatcherSession) deliverSections(bit uint, sections []uint64, bitsets [][]byte) { 591 s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets} 592 } 593 594 // Multiplex polls the matcher session for retrieval tasks and multiplexes it into 595 // the requested retrieval queue to be serviced together with other sessions. 596 // 597 // This method will block for the lifetime of the session. Even after termination 598 // of the session, any request in-flight need to be responded to! Empty responses 599 // are fine though in that case. 600 func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) { 601 for { 602 // Allocate a new bloom bit index to retrieve data for, stopping when done 603 bit, ok := s.allocateRetrieval() 604 if !ok { 605 return 606 } 607 // Bit allocated, throttle a bit if we're below our batch limit 608 if s.pendingSections(bit) < batch { 609 select { 610 case <-s.quit: 611 // Session terminating, we can't meaningfully service, abort 612 s.allocateSections(bit, 0) 613 s.deliverSections(bit, []uint64{}, [][]byte{}) 614 return 615 616 case <-time.After(wait): 617 // Throttling up, fetch whatever's available 618 } 619 } 620 // Allocate as much as we can handle and request servicing 621 sections := s.allocateSections(bit, batch) 622 request := make(chan *Retrieval) 623 624 select { 625 case <-s.quit: 626 // Session terminating, we can't meaningfully service, abort 627 s.deliverSections(bit, sections, make([][]byte, len(sections))) 628 return 629 630 case mux <- request: 631 // Retrieval accepted, something must arrive before we're aborting 632 request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx} 633 634 result := <-request 635 if result.Error != nil { 636 s.err.Store(result.Error) 637 s.Close() 638 } 639 s.deliverSections(result.Bit, result.Sections, result.Bitsets) 640 } 641 } 642 }